From 655759b8b6ad2db338fdd53b140b2cc987329a14 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 27 Apr 2025 15:21:49 +0900 Subject: [PATCH 1/9] gh-133036: deprecate codecs.open --- Doc/library/codecs.rst | 4 +++ Doc/whatsnew/3.14.rst | 4 +++ Lib/codecs.py | 6 ++-- Lib/test/test_codecs.py | 32 ++++++++++++------- ...-04-27-15-21-05.gh-issue-133036.HCNYA7.rst | 2 ++ 5 files changed, 34 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index f63148a9bd2a31..14f6547e4e0522 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -208,6 +208,10 @@ wider range of codecs when working with binary files: .. versionchanged:: 3.11 The ``'U'`` mode has been removed. + .. deprecated:: next + + :func:`codecs.open` has been superseded by :func:`open`. + .. function:: EncodedFile(file, data_encoding, file_encoding=None, errors='strict') diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index e9abb41cfd5251..074f37b8ec2d03 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1575,6 +1575,10 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`codecs`: + :func:`codecs.open` is now deprecated. Use :func:`open` instead. + (Contributed by Inada Naoki in :gh:`133036`.) + * :mod:`functools`: Calling the Python implementation of :func:`functools.reduce` with *function* or *sequence* as keyword arguments is now deprecated. diff --git a/Lib/codecs.py b/Lib/codecs.py index e365e6cf22929f..fc38e922257644 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -884,7 +884,6 @@ def __reduce_ex__(self, proto): ### Shortcuts def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): - """ Open an encoded file using the given mode and return a wrapped version providing transparent encoding/decoding. @@ -912,8 +911,11 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): .encoding which allows querying the used encoding. This attribute is only available if an encoding was specified as parameter. - """ + import warnings + warnings.warn("codecs.open() is deprecated. Use open() instead.", + DeprecationWarning, stacklevel=2) + if encoding is not None and \ 'b' not in mode: # Force opening of the file in binary mode diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e51f7e0ee12b1f..823bb89d8a03f2 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -7,6 +7,7 @@ import unittest import encodings from unittest import mock +import warnings from test import support from test.support import os_helper @@ -28,6 +29,13 @@ else: SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar) +def codecs_open_nowarn(filename, mode='r', encoding=None, errors='strict', buffering=-1): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return codecs.open( + filename, mode, encoding=encoding, errors=errors, + buffering=buffering) + def coding_checker(self, coder): def check(input, expect): self.assertEqual(coder(input), (expect, len(input))) @@ -719,19 +727,19 @@ def test_bug691291(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) with open(os_helper.TESTFN, 'wb') as fp: fp.write(s) - with codecs.open(os_helper.TESTFN, 'r', + with codecs_open_nowarn(os_helper.TESTFN, 'r', encoding=self.encoding) as reader: self.assertEqual(reader.read(), s1) def test_invalid_modes(self): for mode in ('U', 'rU', 'r+U'): with self.assertRaises(ValueError) as cm: - codecs.open(os_helper.TESTFN, mode, encoding=self.encoding) + codecs_open_nowarn(os_helper.TESTFN, mode, encoding=self.encoding) self.assertIn('invalid mode', str(cm.exception)) for mode in ('rt', 'wt', 'at', 'r+t'): with self.assertRaises(ValueError) as cm: - codecs.open(os_helper.TESTFN, mode, encoding=self.encoding) + codecs_open_nowarn(os_helper.TESTFN, mode, encoding=self.encoding) self.assertIn("can't have text and binary mode at once", str(cm.exception)) @@ -1844,9 +1852,9 @@ def test_all(self): def test_open(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) for mode in ('w', 'r', 'r+', 'w+', 'a', 'a+'): - with self.subTest(mode), \ - codecs.open(os_helper.TESTFN, mode, 'ascii') as file: - self.assertIsInstance(file, codecs.StreamReaderWriter) + with self.subTest(mode), self.assertWarns(DeprecationWarning): + with codecs.open(os_helper.TESTFN, mode, 'ascii') as file: + self.assertIsInstance(file, codecs.StreamReaderWriter) def test_undefined(self): self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined') @@ -1863,7 +1871,7 @@ def test_file_closes_if_lookup_error_raised(self): mock_open = mock.mock_open() with mock.patch('builtins.open', mock_open) as file: with self.assertRaises(LookupError): - codecs.open(os_helper.TESTFN, 'wt', 'invalid-encoding') + codecs_open_nowarn(os_helper.TESTFN, 'wt', 'invalid-encoding') file().close.assert_called() @@ -2883,7 +2891,7 @@ def test_seek0(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) for encoding in tests: # Check if the BOM is written only once - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data) f.write(data) f.seek(0) @@ -2892,7 +2900,7 @@ def test_seek0(self): self.assertEqual(f.read(), data * 2) # Check that the BOM is written after a seek(0) - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data[0]) self.assertNotEqual(f.tell(), 0) f.seek(0) @@ -2901,7 +2909,7 @@ def test_seek0(self): self.assertEqual(f.read(), data) # (StreamWriter) Check that the BOM is written after a seek(0) - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.writer.write(data[0]) self.assertNotEqual(f.writer.tell(), 0) f.writer.seek(0) @@ -2911,7 +2919,7 @@ def test_seek0(self): # Check that the BOM is not written after a seek() at a position # different than the start - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data) f.seek(f.tell()) f.write(data) @@ -2920,7 +2928,7 @@ def test_seek0(self): # (StreamWriter) Check that the BOM is not written after a seek() # at a position different than the start - with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.writer.write(data) f.writer.seek(f.writer.tell()) f.writer.write(data) diff --git a/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst b/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst new file mode 100644 index 00000000000000..46b1f5575d0d2e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-27-15-21-05.gh-issue-133036.HCNYA7.rst @@ -0,0 +1,2 @@ +:func:`codecs.open` is now deprecated. Use :func:`open` instead. Contributed +by Inada Naoki. From 47ebf4f4e203b5e772dfddbfe4efef3f9b384bfb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 28 Apr 2025 13:35:15 +0900 Subject: [PATCH 2/9] codecs_open_nowarn -> codecs_open_no_warn --- Lib/test/test_codecs.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 823bb89d8a03f2..a35de8ad87c3cd 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -21,20 +21,12 @@ except ImportError: _testinternalcapi = None -try: - import ctypes -except ImportError: - ctypes = None - SIZEOF_WCHAR_T = -1 -else: - SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar) -def codecs_open_nowarn(filename, mode='r', encoding=None, errors='strict', buffering=-1): +def codecs_open_no_warn(*args, **kwargs): + """call codecs.open(*args, **kwargs) ignoring DeprecationWarning""" with warnings.catch_warnings(): warnings.simplefilter("ignore") - return codecs.open( - filename, mode, encoding=encoding, errors=errors, - buffering=buffering) + return codecs.open(*args, **kwargs) def coding_checker(self, coder): def check(input, expect): @@ -727,19 +719,19 @@ def test_bug691291(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) with open(os_helper.TESTFN, 'wb') as fp: fp.write(s) - with codecs_open_nowarn(os_helper.TESTFN, 'r', + with codecs_open_no_warn(os_helper.TESTFN, 'r', encoding=self.encoding) as reader: self.assertEqual(reader.read(), s1) def test_invalid_modes(self): for mode in ('U', 'rU', 'r+U'): with self.assertRaises(ValueError) as cm: - codecs_open_nowarn(os_helper.TESTFN, mode, encoding=self.encoding) + codecs_open_no_warn(os_helper.TESTFN, mode, encoding=self.encoding) self.assertIn('invalid mode', str(cm.exception)) for mode in ('rt', 'wt', 'at', 'r+t'): with self.assertRaises(ValueError) as cm: - codecs_open_nowarn(os_helper.TESTFN, mode, encoding=self.encoding) + codecs_open_no_warn(os_helper.TESTFN, mode, encoding=self.encoding) self.assertIn("can't have text and binary mode at once", str(cm.exception)) @@ -1871,7 +1863,7 @@ def test_file_closes_if_lookup_error_raised(self): mock_open = mock.mock_open() with mock.patch('builtins.open', mock_open) as file: with self.assertRaises(LookupError): - codecs_open_nowarn(os_helper.TESTFN, 'wt', 'invalid-encoding') + codecs_open_no_warn(os_helper.TESTFN, 'wt', 'invalid-encoding') file().close.assert_called() @@ -2891,7 +2883,7 @@ def test_seek0(self): self.addCleanup(os_helper.unlink, os_helper.TESTFN) for encoding in tests: # Check if the BOM is written only once - with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data) f.write(data) f.seek(0) @@ -2900,7 +2892,7 @@ def test_seek0(self): self.assertEqual(f.read(), data * 2) # Check that the BOM is written after a seek(0) - with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data[0]) self.assertNotEqual(f.tell(), 0) f.seek(0) @@ -2909,7 +2901,7 @@ def test_seek0(self): self.assertEqual(f.read(), data) # (StreamWriter) Check that the BOM is written after a seek(0) - with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.writer.write(data[0]) self.assertNotEqual(f.writer.tell(), 0) f.writer.seek(0) @@ -2919,7 +2911,7 @@ def test_seek0(self): # Check that the BOM is not written after a seek() at a position # different than the start - with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.write(data) f.seek(f.tell()) f.write(data) @@ -2928,7 +2920,7 @@ def test_seek0(self): # (StreamWriter) Check that the BOM is not written after a seek() # at a position different than the start - with codecs_open_nowarn(os_helper.TESTFN, 'w+', encoding=encoding) as f: + with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f: f.writer.write(data) f.writer.seek(f.writer.tell()) f.writer.write(data) From ff0d25d9d14c4a476fd44d41b05d7545eead88fe Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 28 Apr 2025 16:39:28 +0900 Subject: [PATCH 3/9] fix tests --- Lib/test/test_multibytecodec.py | 3 ++- Lib/test/test_sax.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 1b55f1e70b32f5..d7a233377bdb02 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -314,7 +314,8 @@ def test_bug1728403(self): f.write(b'\xa1') finally: f.close() - f = codecs.open(TESTFN, encoding='cp949') + with self.assertWarns(DeprecationWarning): + f = codecs.open(TESTFN, encoding='cp949') try: self.assertRaises(UnicodeDecodeError, f.read, 2) finally: diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 0d0f86c145b499..5c10bcedc69bc6 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1,5 +1,4 @@ # regression test for SAX 2.0 -# $Id$ from xml.sax import make_parser, ContentHandler, \ SAXException, SAXReaderNotAvailable, SAXParseException @@ -832,8 +831,9 @@ class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase): fname = os_helper.TESTFN + '-codecs' def ioclass(self): - writer = codecs.open(self.fname, 'w', encoding='ascii', - errors='xmlcharrefreplace', buffering=0) + with self.assertWarns(DeprecationWarning): + writer = codecs.open(self.fname, 'w', encoding='ascii', + errors='xmlcharrefreplace', buffering=0) def cleanup(): writer.close() os_helper.unlink(self.fname) From cba4e03829d96bd1f91b66e148c96a22c170a7f5 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 28 Apr 2025 18:07:57 +0900 Subject: [PATCH 4/9] remove codecs.open from error message --- Lib/_pyio.py | 3 +-- Modules/_io/textio.c | 5 ++--- Python/codecs.c | 18 +++++++++++++----- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index b875103bee441c..a870de5b532542 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -2056,8 +2056,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None, raise ValueError("invalid encoding: %r" % encoding) if not codecs.lookup(encoding)._is_text_encoding: - msg = ("%r is not a text encoding; " - "use codecs.open() to handle arbitrary codecs") + msg = "%r is not a text encoding" raise LookupError(msg % encoding) if errors is None: diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index e77d8448310fba..a5b2ca7240a55f 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1185,7 +1185,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, } /* Check we have been asked for a real text encoding */ - codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()"); + codec_info = _PyCodec_LookupTextEncoding(encoding, NULL); if (codec_info == NULL) { Py_CLEAR(self->encoding); goto error; @@ -1324,8 +1324,7 @@ textiowrapper_change_encoding(textio *self, PyObject *encoding, } // Create new encoder & decoder - PyObject *codec_info = _PyCodec_LookupTextEncoding( - c_encoding, "codecs.open()"); + PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL); if (codec_info == NULL) { Py_DECREF(encoding); Py_DECREF(errors); diff --git a/Python/codecs.c b/Python/codecs.c index 265f5214e5bad2..caf8d9d5f3c188 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -540,11 +540,19 @@ PyObject * _PyCodec_LookupTextEncoding(const char *encoding, Py_DECREF(attr); if (is_text_codec <= 0) { Py_DECREF(codec); - if (!is_text_codec) - PyErr_Format(PyExc_LookupError, - "'%.400s' is not a text encoding; " - "use %s to handle arbitrary codecs", - encoding, alternate_command); + if (!is_text_codec) { + if (alternate_command != NULL) { + PyErr_Format(PyExc_LookupError, + "'%.400s' is not a text encoding; " + "use %s to handle arbitrary codecs", + encoding, alternate_command); + } + else { + PyErr_Format(PyExc_LookupError, + "'%.400s' is not a text encoding", + encoding); + } + } return NULL; } } From c2a52e390f092ca051a465ce87aaaccf961c0b8e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 28 Apr 2025 21:06:37 +0900 Subject: [PATCH 5/9] fix windows test --- Lib/test/test_codecs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a35de8ad87c3cd..28039ae6cdd368 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -35,13 +35,13 @@ def check(input, expect): # On small versions of Windows like Windows IoT or Windows Nano Server not all codepages are present def is_code_page_present(cp): - from ctypes import POINTER, WINFUNCTYPE, WinDLL + from ctypes import POINTER, WINFUNCTYPE, WinDLL, Structure from ctypes.wintypes import BOOL, BYTE, WCHAR, UINT, DWORD MAX_LEADBYTES = 12 # 5 ranges, 2 bytes ea., 0 term. MAX_DEFAULTCHAR = 2 # single or double byte MAX_PATH = 260 - class CPINFOEXW(ctypes.Structure): + class CPINFOEXW(Structure): _fields_ = [("MaxCharSize", UINT), ("DefaultChar", BYTE*MAX_DEFAULTCHAR), ("LeadByte", BYTE*MAX_LEADBYTES), From 744eff063a04e837a6dc4df224a9d2e405534f28 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 29 Apr 2025 10:12:36 +0900 Subject: [PATCH 6/9] add pending-removal-in-future entry --- Doc/deprecations/pending-removal-in-future.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 24e8853da90621..d60717af652b44 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -49,6 +49,8 @@ although there is currently no date scheduled for their removal. :data:`calendar.FEBRUARY`. (Contributed by Prince Roshan in :gh:`103636`.) +* :func:`codecs.open`: use the :func:`open` instead. (:gh:`133038`) + * :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method instead. From b2dced556fd4c6bab1838a2f15f0ff4fd5a2af19 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 29 Apr 2025 17:23:44 +0900 Subject: [PATCH 7/9] Apply suggestions from code review Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/deprecations/pending-removal-in-future.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index d60717af652b44..977a1cc7709d7f 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -49,7 +49,7 @@ although there is currently no date scheduled for their removal. :data:`calendar.FEBRUARY`. (Contributed by Prince Roshan in :gh:`103636`.) -* :func:`codecs.open`: use the :func:`open` instead. (:gh:`133038`) +* :mod:`codecs`: use :func:`open` instead of :func:`codecs.open`. (:gh:`133038`) * :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method instead. From c8af536acd65403e68d7a891493e36022557b7f4 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 29 Apr 2025 18:10:49 +0900 Subject: [PATCH 8/9] Update Doc/deprecations/pending-removal-in-future.rst Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> From ce5c94b0f150b9e2b6adbb96736eab05f9d18949 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 29 Apr 2025 20:50:01 +0900 Subject: [PATCH 9/9] Update Lib/test/test_codecs.py Co-authored-by: Victor Stinner --- Lib/test/test_codecs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 28039ae6cdd368..94fcf98e75721f 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -23,7 +23,7 @@ def codecs_open_no_warn(*args, **kwargs): - """call codecs.open(*args, **kwargs) ignoring DeprecationWarning""" + """Call codecs.open(*args, **kwargs) ignoring DeprecationWarning.""" with warnings.catch_warnings(): warnings.simplefilter("ignore") return codecs.open(*args, **kwargs)