From 96eeaebecdf1a3a131fb325938692c35e8af07ab Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 20 Mar 2022 13:00:20 +0900 Subject: [PATCH 01/11] bpo-47000: Make io.text_encoding() respects UTF-8 mode. --- Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init.h | 1 + Lib/_pyio.py | 8 ++++++-- Lib/test/test_io.py | 11 +++++++++++ .../Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst | 1 + Modules/_io/_iomodule.c | 11 +++++++++-- Python/sysmodule.c | 3 ++- 7 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 35bffa7aff9493..6cf1f20a08a28c 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -48,6 +48,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(newline, "\n") STRUCT_FOR_STR(open_br, "{") STRUCT_FOR_STR(percent, "%") + STRUCT_FOR_STR(utf_8, "utf-8") } literals; struct { diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 20d543a8cbc565..99ad76362c5251 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -672,6 +672,7 @@ extern "C" { INIT_STR(newline, "\n"), \ INIT_STR(open_br, "{"), \ INIT_STR(percent, "%"), \ + INIT_STR(utf_8, "utf-8"), \ }, \ .identifiers = { \ INIT_ID(False), \ diff --git a/Lib/_pyio.py b/Lib/_pyio.py index fd00d6536c0768..ca2b09258b0eeb 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -45,7 +45,8 @@ def text_encoding(encoding, stacklevel=2): A helper function to choose the text encoding. When encoding is not None, just return it. - Otherwise, return the default text encoding (i.e. "locale"). + Otherwise, return the default text encoding (i.e. "locale", or "utf-8" + if UTF-8 mode is enabled). This function emits an EncodingWarning if *encoding* is None and sys.flags.warn_default_encoding is true. @@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2): However, please consider using encoding="utf-8" for new APIs. """ if encoding is None: - encoding = "locale" + if sys.flags.utf8_mode: + encoding = "utf-8" + else: + encoding = "locale" if sys.flags.warn_default_encoding: import warnings warnings.warn("'encoding' argument not specified.", diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 2d0ca878788f26..67be108d2526f7 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4289,6 +4289,17 @@ def test_check_encoding_warning(self): self.assertTrue( warnings[1].startswith(b":8: EncodingWarning: ")) + def test_text_encoding(self): + # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8" + # based on sys.flags.utf8_mode + code = "import io; print(io.text_encoding(None))" + + proc = assert_python_ok('-X', 'utf8=0', '-c', code) + self.assertEqual(b"locale", proc.out.strip()) + + proc = assert_python_ok('-X', 'utf8=1', '-c', code) + self.assertEqual(b"utf-8", proc.out.strip()) + @support.cpython_only # Depending if OpenWrapper was already created or not, the warning is # emitted or not. For example, the attribute is already created when this diff --git a/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst new file mode 100644 index 00000000000000..f96b6e627ed116 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst @@ -0,0 +1 @@ +Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 7f029f26078b80..85d5af61edffb4 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -458,7 +458,8 @@ _io.text_encoding A helper function to choose the text encoding. When encoding is not None, just return it. -Otherwise, return the default text encoding (i.e. "locale"). +Otherwise, return the default text encoding (i.e. "locale", or "utf-8" +if UTF-8 mode is enabled). This function emits an EncodingWarning if encoding is None and sys.flags.warn_default_encoding is true. @@ -479,7 +480,13 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) return NULL; } } - return &_Py_ID(locale); + const PyPreConfig *preconfig = &_PyRuntime.preconfig; + if (preconfig->utf8_mode) { + return &_Py_STR(utf_8); + } + else { + return &_Py_ID(locale); + } } Py_INCREF(encoding); return encoding; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 99540b09c1f465..7498f617865644 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -841,7 +841,8 @@ static PyObject * sys_getdefaultencoding_impl(PyObject *module) /*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/ { - return PyUnicode_FromString(PyUnicode_GetDefaultEncoding()); + _Py_DECLARE_STR(utf_8, "utf-8"); + return &_Py_STR(utf_8); } /*[clinic input] From b8f751f9c214104826d590a8b705a1081737eacb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 20 Mar 2022 14:17:06 +0900 Subject: [PATCH 02/11] Fix refcounting --- Modules/_io/_iomodule.c | 4 ++-- Python/sysmodule.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 85d5af61edffb4..3757a8521adb20 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -482,10 +482,10 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) } const PyPreConfig *preconfig = &_PyRuntime.preconfig; if (preconfig->utf8_mode) { - return &_Py_STR(utf_8); + encoding = &_Py_STR(utf_8); } else { - return &_Py_ID(locale); + encoding = &_Py_ID(locale); } } Py_INCREF(encoding); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 7498f617865644..9ec49f949b79ad 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -842,7 +842,9 @@ sys_getdefaultencoding_impl(PyObject *module) /*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/ { _Py_DECLARE_STR(utf_8, "utf-8"); - return &_Py_STR(utf_8); + PyObject *ret = &_Py_STR(utf_8); + Py_INCREF(ret); + return ret; } /*[clinic input] From bafa2635fa08917661510d96e8db5b97a0edcec9 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 20 Mar 2022 14:50:49 +0900 Subject: [PATCH 03/11] make clinic --- Modules/_io/_iomodule.c | 2 +- Modules/_io/clinic/_iomodule.c.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 3757a8521adb20..ecaa156e1d46ac 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -470,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=350c198cb6b0d25e]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index d5fb176eb66be8..d8b85c8fe3352d 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -274,7 +274,8 @@ PyDoc_STRVAR(_io_text_encoding__doc__, "A helper function to choose the text encoding.\n" "\n" "When encoding is not None, just return it.\n" -"Otherwise, return the default text encoding (i.e. \"locale\").\n" +"Otherwise, return the default text encoding (i.e. \"locale\", or \"utf-8\"\n" +"if UTF-8 mode is enabled).\n" "\n" "This function emits an EncodingWarning if encoding is None and\n" "sys.flags.warn_default_encoding is true.\n" @@ -354,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=6ea315343f6a94ba input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5492a6512a3d7db0 input=a9049054013a1b77]*/ From de5867ffbb36f792c90a872a768df1b81b957915 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 23 Mar 2022 09:23:36 +0900 Subject: [PATCH 04/11] Update Modules/_io/_iomodule.c Co-authored-by: Eric Snow --- Modules/_io/_iomodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index ecaa156e1d46ac..7b13c734a683db 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -482,6 +482,7 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) } const PyPreConfig *preconfig = &_PyRuntime.preconfig; if (preconfig->utf8_mode) { + _Py_DECLARE_STR(utf_8, "utf-8"); encoding = &_Py_STR(utf_8); } else { From 52045fcb0d9bb6cade275089c3052acfaf38d6b6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 31 Mar 2022 13:45:19 +0900 Subject: [PATCH 05/11] Update Modules/_io/_iomodule.c Co-authored-by: Eric Snow --- Modules/_io/_iomodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 7b13c734a683db..4e8ca01c77fdab 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -482,6 +482,7 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) } const PyPreConfig *preconfig = &_PyRuntime.preconfig; if (preconfig->utf8_mode) { + _Py_DECLARE_STR(utf_8, "utf-8"); _Py_DECLARE_STR(utf_8, "utf-8"); encoding = &_Py_STR(utf_8); } From 37f640acfce149004ca0d2c2457ea1d11f591dc3 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 31 Mar 2022 14:29:29 +0900 Subject: [PATCH 06/11] test: ignore utf-8 vs UTF-8 --- Lib/test/test_utf8_mode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 2b96f76df305fe..308e8e8aea6c22 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -161,7 +161,7 @@ def test_io(self): filename = __file__ out = self.get_output('-c', code, filename, PYTHONUTF8='1') - self.assertEqual(out, 'UTF-8/strict') + self.assertEqual(out.lower(), 'utf-8/strict') def _check_io_encoding(self, module, encoding=None, errors=None): filename = __file__ @@ -183,10 +183,10 @@ def _check_io_encoding(self, module, encoding=None, errors=None): PYTHONUTF8='1') if not encoding: - encoding = 'UTF-8' + encoding = 'utf-8' if not errors: errors = 'strict' - self.assertEqual(out, f'{encoding}/{errors}') + self.assertEqual(out.lower(), f'{encoding}/{errors}') def check_io_encoding(self, module): self._check_io_encoding(module, encoding="latin1") From addff879d7a863c4897260bb4b8d0ee387bbb69c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 31 Mar 2022 15:23:17 +0900 Subject: [PATCH 07/11] Update _iomodule.c --- Modules/_io/_iomodule.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 4e8ca01c77fdab..7b13c734a683db 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -482,7 +482,6 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) } const PyPreConfig *preconfig = &_PyRuntime.preconfig; if (preconfig->utf8_mode) { - _Py_DECLARE_STR(utf_8, "utf-8"); _Py_DECLARE_STR(utf_8, "utf-8"); encoding = &_Py_STR(utf_8); } From 11c614c74d32e933185c899e0b6fdf26a69b8c41 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 3 Apr 2022 13:25:59 +0900 Subject: [PATCH 08/11] Update the doc --- Doc/library/io.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index d5123348195bd9..7b3c5daaebde6a 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -198,12 +198,13 @@ High-level Module Interface This is a helper function for callables that use :func:`open` or :class:`TextIOWrapper` and have an ``encoding=None`` parameter. - This function returns *encoding* if it is not ``None`` and ``"locale"`` if - *encoding* is ``None``. + This function returns *encoding* if it is not ``None``. + Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on + :ref:`UTF-8 Mode `. This function emits an :class:`EncodingWarning` if :data:`sys.flags.warn_default_encoding ` is true and *encoding* - is None. *stacklevel* specifies where the warning is emitted. + is ``None``. *stacklevel* specifies where the warning is emitted. For example:: def read_text(path, encoding=None): @@ -217,6 +218,10 @@ High-level Module Interface See :ref:`io-text-encoding` for more information. .. versionadded:: 3.10 + + .. versionchanged:: 3.11 + :func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and + *encoding* is ``None``. .. exception:: BlockingIOError From e6f46fa1cb8ad760cd200e853e5a7bc54bfc8c6c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 3 Apr 2022 13:33:03 +0900 Subject: [PATCH 09/11] Remove trailing spaces. --- Doc/library/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 7b3c5daaebde6a..80107d539505c0 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -218,7 +218,7 @@ High-level Module Interface See :ref:`io-text-encoding` for more information. .. versionadded:: 3.10 - + .. versionchanged:: 3.11 :func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and *encoding* is ``None``. From ff8eb2796c69b1ad726c177a3a7235db81753745 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 3 Apr 2022 21:44:44 +0900 Subject: [PATCH 10/11] Update docstring --- Lib/_pyio.py | 6 +++--- Modules/_io/_iomodule.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index ca2b09258b0eeb..e3ff59eb1adb19 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -44,9 +44,9 @@ def text_encoding(encoding, stacklevel=2): """ A helper function to choose the text encoding. - When encoding is not None, just return it. - Otherwise, return the default text encoding (i.e. "locale", or "utf-8" - if UTF-8 mode is enabled). + When encoding is not None, this function returns it. + Otherwise, this function returns the default text encoding + (i.e. "locale" or "utf-8" depends on UTF-8 mode). This function emits an EncodingWarning if *encoding* is None and sys.flags.warn_default_encoding is true. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 7b13c734a683db..d196c2849f534b 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -457,9 +457,9 @@ _io.text_encoding A helper function to choose the text encoding. -When encoding is not None, just return it. -Otherwise, return the default text encoding (i.e. "locale", or "utf-8" -if UTF-8 mode is enabled). +When encoding is not None, this function returns it. +Otherwise, this function returns the default text encoding +(i.e. "locale" or "utf-8" depends on UTF-8 mode). This function emits an EncodingWarning if encoding is None and sys.flags.warn_default_encoding is true. From ce48c4a868e5614fce3e64b872fa5cd1fc6f9832 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 3 Apr 2022 22:00:33 +0900 Subject: [PATCH 11/11] make clinic --- Modules/_io/_iomodule.c | 2 +- Modules/_io/clinic/_iomodule.c.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index d196c2849f534b..065f5e29c315bf 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -470,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=350c198cb6b0d25e]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index d8b85c8fe3352d..e4a6b8c42e1d84 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -273,9 +273,9 @@ PyDoc_STRVAR(_io_text_encoding__doc__, "\n" "A helper function to choose the text encoding.\n" "\n" -"When encoding is not None, just return it.\n" -"Otherwise, return the default text encoding (i.e. \"locale\", or \"utf-8\"\n" -"if UTF-8 mode is enabled).\n" +"When encoding is not None, this function returns it.\n" +"Otherwise, this function returns the default text encoding\n" +"(i.e. \"locale\" or \"utf-8\" depends on UTF-8 mode).\n" "\n" "This function emits an EncodingWarning if encoding is None and\n" "sys.flags.warn_default_encoding is true.\n" @@ -355,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=5492a6512a3d7db0 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/