10000 bpo-47000: Make `io.text_encoding()` respects UTF-8 mode (GH-32003) · python/cpython@4216dce · GitHub
[go: up one dir, main page]

Skip to content

Commit 4216dce

Browse files
bpo-47000: Make io.text_encoding() respects UTF-8 mode (GH-32003)
Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
1 parent 6db2db9 commit 4216dce

File tree

10 files changed

+52
-17
lines changed

10 files changed

+52
-17
lines changed

Doc/library/io.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,12 +198,13 @@ High-level Module Interface
198198
This is a helper function for callables that use :func:`open` or
199199
:class:`TextIOWrapper` and have an ``encoding=None`` parameter.
200200

201-
This function returns *encoding* if it is not ``None`` and ``"locale"`` if
202-
*encoding* is ``None``.
201+
This function returns *encoding* if it is not ``None``.
202+
Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on
203+
:ref:`UTF-8 Mode <utf8-mode>`.
203204

204205
This function emits an :class:`EncodingWarning` if
205206
:data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
206-
is None. *stacklevel* specifies where the warning is emitted.
207+
is ``None``. *stacklevel* specifies where the warning is emitted.
207208
For example::
208209

209210
def read_text(path, encoding=None):
@@ -218,6 +219,10 @@ High-level Module Interface
218219

219220
.. versionadded:: 3.10
220221

222+
.. versionchanged:: 3.11
223+
:func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and
224+
*encoding* is ``None``.
225+
221226

222227
.. exception:: BlockingIOError
223228

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48 8000 ,7 @@ struct _Py_global_strings {
4848
STRUCT_FOR_STR(newline, "\n")
4949
STRUCT_FOR_STR(open_br, "{")
5050
STRUCT_FOR_STR(percent, "%")
51+
STRUCT_FOR_STR(utf_8, "utf-8")
5152
} literals;
5253

5354
struct {

Include/internal/pycore_runtime_init.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,7 @@ extern "C" {
672672
INIT_STR(newline, "\n"), \
673673
INIT_STR(open_br, "{"), \
674674
INIT_STR(percent, "%"), \
675+
INIT_STR(utf_8, "utf-8"), \
675676
}, \
676677
.identifiers = { \
677678
INIT_ID(False), \

Lib/_pyio.py

-3Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
4444
"""
4545
A helper function to choose the text encoding.
4646
47-
When encoding is not None, just return it.
48-
Otherwise, return the default text encoding (i.e. "locale").
47+
When encoding is not None, this function returns it.
48+
Otherwise, this function returns the default text encoding
49+
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
4950
5051
This function emits an EncodingWarning if *encoding* is None and
5152
sys.flags.warn_default_encoding is true.
@@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
5556
However, please consider using encoding="utf-8" for new APIs.
5657
"""
5758
if encoding is None:
58-
encoding = "locale"
59+
if sys.flags.utf8_mode:
60+
encoding = "utf-8"
61+
else:
62+
encoding = "locale"
5963
if sys.flags.warn_default_encoding:
6064
import warnings
6165
warnings.warn("'encoding' argument not specified.",

Lib/test/test_io.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4289,6 +4289,17 @@ def test_check_encoding_warning(self):
42894289
self.assertTrue(
42904290
warnings[1].startswith(b"<string>:8: EncodingWarning: "))
42914291

4292+
def test_text_encoding(self):
4293+
# PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8"
4294+
# based on sys.flags.utf8_mode
4295+
code = "import io; print(io.text_encoding(None))"
4296+
4297+
proc = assert_python_ok('-X', 'utf8=0', '-c', code)
4298+
self.assertEqual(b"locale", proc.out.strip())
4299+
4300+
proc = assert_python_ok('-X', 'utf8=1', '-c', code)
4301+
self.assertEqual(b"utf-8", proc.out.strip())
4302+
42924303
@support.cpython_only
42934304
# Depending if OpenWrapper was already created or not, the warning is
42944305
# emitted or not. For example, the attribute is already created when this

Lib/test/test_utf8_mode.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def test_io(self):
161161
filename = __file__
162162

163163
out = self.get_output('-c', code, filename, PYTHONUTF8='1')
164-
self.assertEqual(out, 'UTF-8/strict')
164+
self.assertEqual(out.lower(), 'utf-8/strict')
165165

166166
def _check_io_encoding(self, module, encoding=None, errors=None):
167167
filename = __file__
@@ -183,10 +183,10 @@ def _check_io_encoding(self, module, encoding=None, errors=None):
183183
PYTHONUTF8='1')
184184

185185
if not encoding:
186-
encoding = 'UTF-8'
186+
encoding = 'utf-8'
187187
if not errors:
188188
errors = 'strict'
189-
self.assertEqual(out, f'{encoding}/{errors}')
189+
self.assertEqual(out.lower(), f'{encoding}/{errors}')
190190

191191
def check_io_encoding(self, module):
192192
self._check_io_encoding(module, encoding="latin1")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.

Modules/_io/_iomodule.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,9 @@ _io.text_encoding
457457
458458
A helper function to choose the text encoding.
459459
460-
When encoding is not None, just return it.
461-
Otherwise, return the default text encoding (i.e. "locale").
460+
When encoding is not None, this function returns it.
461+
Otherwise, this function returns the default text encoding
462+
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
462463
463464
This function emits an EncodingWarning if encoding is None and
464465
sys.flags.warn_default_encoding is true.
@@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs.
469470

470471
static PyObject *
471472
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
472-
/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
473+
/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/
473474
{
474475
if (encoding == NULL || encoding == Py_None) {
475476
PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
479480
return NULL;
480481
}
481482
}
482-
return &_Py_ID(locale);
483+
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
484+
if (preconfig->utf8_mode) {
485+
_Py_DECLARE_STR(utf_8, "utf-8");
486+
encoding = &_Py_STR(utf_8);
487+
}
488+
else {
489+
encoding = &_Py_ID(locale);
490+
}
483491
}
484492
Py_INCREF(encoding);
485493
return encoding;

Modules/_io/clinic/_iomodule.c.h

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/sysmodule.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,10 @@ static PyObject *
841841
sys_getdefaultencoding_impl(PyObject *module)
842842
/*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
843843
{
844-
return PyUnicode_FromString(PyUnicode_GetDefaultEncoding());
844+
_Py_DECLARE_STR(utf_8, "utf-8");
845+
PyObject *ret = &_Py_STR(utf_8);
846+
Py_INCREF(ret);
847+
return ret;
845848
}
846849

847850
/*[clinic input]

0 commit comments

Comments
 (0)
0