From 5b924dc728ea6c8a49014d35326e07a88e7205f8 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Fri, 24 May 2024 16:34:53 +0300 Subject: [PATCH 01/13] gh-119506: fix `_io.TextIOWrapper.write()` write during flush * check if call to `_textiowrapper_writeflush()` has left any data in `self->pending_bytes`. If so, store them in `self->pending_bytes` after `b` * add test --- Lib/test/test_io.py | 16 ++++++++++++++++ Modules/_io/textio.c | 17 ++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index e5cb08c2cdd04c..c6910990b3e810 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4016,6 +4016,22 @@ def write(self, data): t.write("x"*chunk_size) self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack) + def test_issue119506(self): + chunk_size = 8192 + + class MockIO(self.MockRawIO): + def write(self, data): + t.write("efg") + return super().write(data) + + buf = MockIO() + t = self.TextIOWrapper(buf) + t.write("a" * (chunk_size - 1)) + t.write("bcd") + t.flush() + + self.assertEqual([b"a" * (chunk_size - 1), b"bcdefg"], buf._write_stack) + class PyTextIOWrapperTest(TextIOWrapperTest): io = pyio diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 9dff8eafb2560f..b53561105b6a6f 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1719,16 +1719,23 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) bytes_len = PyBytes_GET_SIZE(b); } - if (self->pending_bytes == NULL) { - self->pending_bytes_count = 0; - self->pending_bytes = b; - } - else if (self->pending_bytes_count + bytes_len > self->chunk_size) { + if (self->pending_bytes_count + bytes_len > self->chunk_size) { // Prevent to concatenate more than chunk_size data. if (_textiowrapper_writeflush(self) < 0) { Py_DECREF(b); return NULL; } + if (self->pending_bytes) { + // gh-119506: call to _textiowrapper_writeflush() + // can write new data to pending_bytes + PyObject *tmp = self->pending_bytes; + self->pending_bytes = b; + b = tmp; + } + } + + if (self->pending_bytes == NULL) { + self->pending_bytes_count = 0; self->pending_bytes = b; } else if (!PyList_CheckExact(self->pending_bytes)) { From f8fd65936f9f00cbc8d451761e28e27ae4222e05 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 14:32:29 +0000 Subject: [PATCH 02/13] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst new file mode 100644 index 00000000000000..c4cd588b4fa194 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -0,0 +1 @@ +Fix bug in :meth:`io.TextIOWrapper.write`, which caused its internal buffer to be rewritten during flushing of large data. From 4809c1abb9212b3049bf861989a0823e56343da6 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov <52372310+chgnrdv@users.noreply.github.com> Date: Fri, 24 May 2024 17:34:04 +0300 Subject: [PATCH 03/13] Update 2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst --- .../next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst index c4cd588b4fa194..5088c840134933 100644 --- a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -1 +1 @@ -Fix bug in :meth:`io.TextIOWrapper.write`, which caused its internal buffer to be rewritten during flushing of large data. +Fix bug in :meth:`io.TextIOWrapper.write`, which caused :class:`io.TextIOWrapper` internal buffer to be rewritten during flushing of large data. From 2e72b28e624703d67df5b74a8cdc070d625a60e3 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov <52372310+chgnrdv@users.noreply.github.com> Date: Fri, 24 May 2024 17:43:13 +0300 Subject: [PATCH 04/13] Update 2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst --- .../next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst index 5088c840134933..550420d0619640 100644 --- a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -1 +1 @@ -Fix bug in :meth:`io.TextIOWrapper.write`, which caused :class:`io.TextIOWrapper` internal buffer to be rewritten during flushing of large data. +Fix bug in :meth:`~io.TextIOWrapper.write` method of :class:`io.TextIOWrapper`, which causes its internal buffer to be rewritten during flushing of large data. From 43628cec598c3cfce72990035e2e3075d7a92d4c Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Fri, 24 May 2024 19:23:40 +0300 Subject: [PATCH 05/13] refactor --- Modules/_io/textio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index b53561105b6a6f..7697365efc03cf 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1719,6 +1719,8 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) bytes_len = PyBytes_GET_SIZE(b); } + PyObject *pending = self->pending_bytes; + if (self->pending_bytes_count + bytes_len > self->chunk_size) { // Prevent to concatenate more than chunk_size data. if (_textiowrapper_writeflush(self) < 0) { @@ -1728,9 +1730,9 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) if (self->pending_bytes) { // gh-119506: call to _textiowrapper_writeflush() // can write new data to pending_bytes - PyObject *tmp = self->pending_bytes; + pending = self->pending_bytes; self->pending_bytes = b; - b = tmp; + b = pending; } } @@ -1741,6 +1743,7 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) else if (!PyList_CheckExact(self->pending_bytes)) { PyObject *list = PyList_New(2); if (list == NULL) { + self->pending_bytes = pending; Py_DECREF(b); return NULL; } @@ -1750,6 +1753,7 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) } else { if (PyList_Append(self->pending_bytes, b) < 0) { + self->pending_bytes = pending; Py_DECREF(b); return NULL; } From a3fe68e41d99b7f39fc649a381bdb2ff73547162 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov <52372310+chgnrdv@users.noreply.github.com> Date: Fri, 24 May 2024 19:27:14 +0300 Subject: [PATCH 06/13] Update 2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst --- .../next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst index 550420d0619640..dc2928b13076c6 100644 --- a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -1 +1 @@ -Fix bug in :meth:`~io.TextIOWrapper.write` method of :class:`io.TextIOWrapper`, which causes its internal buffer to be rewritten during flushing of large data. +Fix bug in :meth:`!io.TextIOWrapper.write` method of :class:`io.TextIOWrapper`, which causes its internal buffer to be rewritten during flushing of large data. From c246d7f6b626ca645690477293d16f819f94e2c1 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov <52372310+chgnrdv@users.noreply.github.com> Date: Fri, 24 May 2024 21:41:02 +0300 Subject: [PATCH 07/13] Update 2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst --- .../next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst index dc2928b13076c6..b2006ba7968d24 100644 --- a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -1 +1 @@ -Fix bug in :meth:`!io.TextIOWrapper.write` method of :class:`io.TextIOWrapper`, which causes its internal buffer to be rewritten during flushing of large data. +Fix bug in :meth:`!io.TextIOWrapper.write` method, which causes wrapper's internal buffer to be rewritten during flushing of large data. From 156940d71cb4e61f423e6bd9b31eb19df2b92abf Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 1 Jun 2024 01:26:02 +0900 Subject: [PATCH 08/13] fixup --- Modules/_io/textio.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 7697365efc03cf..02aa624a6aa2a8 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1719,21 +1719,15 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) bytes_len = PyBytes_GET_SIZE(b); } - PyObject *pending = self->pending_bytes; - - if (self->pending_bytes_count + bytes_len > self->chunk_size) { - // Prevent to concatenate more than chunk_size data. + // Prevent to concatenate more than chunk_size data. + while (self->pending_bytes && + bytes_len + self->pending_bytes_count > self->chunk_size) { if (_textiowrapper_writeflush(self) < 0) { Py_DECREF(b); return NULL; } - if (self->pending_bytes) { - // gh-119506: call to _textiowrapper_writeflush() - // can write new data to pending_bytes - pending = self->pending_bytes; - self->pending_bytes = b; - b = pending; - } + // _textiowrapper_writeflush() releases GIL during write. + // self->pending_bytes and self->pending_bytes_count may be changed now. } if (self->pending_bytes == NULL) { @@ -1743,7 +1737,6 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) else if (!PyList_CheckExact(self->pending_bytes)) { PyObject *list = PyList_New(2); if (list == NULL) { - self->pending_bytes = pending; Py_DECREF(b); return NULL; } @@ -1753,7 +1746,6 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) } else { if (PyList_Append(self->pending_bytes, b) < 0) { - self->pending_bytes = pending; Py_DECREF(b); return NULL; } From 2392d2087ba0dcb5d19bfc81747f4d77259321d5 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 1 Jun 2024 01:40:05 +0900 Subject: [PATCH 09/13] fix test --- Lib/test/test_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index c6910990b3e810..65fa572d5a85ab 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4030,7 +4030,7 @@ def write(self, data): t.write("bcd") t.flush() - self.assertEqual([b"a" * (chunk_size - 1), b"bcdefg"], buf._write_stack) + self.assertEqual([b"a" * (chunk_size - 1), b"efgbcd"], buf._write_stack) class PyTextIOWrapperTest(TextIOWrapperTest): From 02cf53af3adb7852e8f9d11fc6169a80e6b8d34b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 1 Jun 2024 12:26:55 +0900 Subject: [PATCH 10/13] add assert --- Modules/_io/textio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 02aa624a6aa2a8..5a9b70724de862 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1740,6 +1740,9 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) Py_DECREF(b); return NULL; } + // Since Python 3.12, allocating GC object won't trigger GC and release + // GIL. See https://github.com/python/cpython/issues/97922 + assert(!PyList_CheckExact(self->pending_bytes)); PyList_SET_ITEM(list, 0, self->pending_bytes); PyList_SET_ITEM(list, 1, b); self->pending_bytes = list; From 483975c61d0f5eadf99f3f3417ac0bbab948efee Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 1 Jun 2024 13:00:37 +0900 Subject: [PATCH 11/13] simplify code and write more comments --- Modules/_io/textio.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 5a9b70724de862..c162d8106ec1fd 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1719,19 +1719,26 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) bytes_len = PyBytes_GET_SIZE(b); } - // Prevent to concatenate more than chunk_size data. - while (self->pending_bytes && - bytes_len + self->pending_bytes_count > self->chunk_size) { - if (_textiowrapper_writeflush(self) < 0) { - Py_DECREF(b); - return NULL; + // We should avoid concatinating huge data. + // Flush the buffer before adding b to the buffer if b is not small. + // https://github.com/python/cpython/issues/87426 + if (bytes_len >= self->chunk_size) { + // _textiowrapper_writeflush() calls buffer.write(). + // self->pending_bytes can be appended during buffer->write() + // or other thread. + // We need to loop until buffer becomes empty. + // https://github.com/python/cpython/issues/118138 + // https://github.com/python/cpython/issues/119506 + while (self->pending_bytes != NULL) { + if (_textiowrapper_writeflush(self) < 0) { + Py_DECREF(b); + return NULL; + } } - // _textiowrapper_writeflush() releases GIL during write. - // self->pending_bytes and self->pending_bytes_count may be changed now. } if (self->pending_bytes == NULL) { - self->pending_bytes_count = 0; + assert(self->pending_bytes_count == 0); self->pending_bytes = b; } else if (!PyList_CheckExact(self->pending_bytes)) { From 01327f709e3dd4f00021fdaebc5451658c8009c1 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 1 Jun 2024 13:35:21 +0900 Subject: [PATCH 12/13] update test --- Lib/test/test_io.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 65fa572d5a85ab..1ca3edac8c8dc9 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4020,17 +4020,23 @@ def test_issue119506(self): chunk_size = 8192 class MockIO(self.MockRawIO): + written = False def write(self, data): - t.write("efg") + if not self.written: + self.written = True + t.write("middle") return super().write(data) buf = MockIO() t = self.TextIOWrapper(buf) - t.write("a" * (chunk_size - 1)) - t.write("bcd") + t.write("abc") + t.write("def") + # writing data which size >= chunk_size cause flushing buffer before write. + t.write("g" * chunk_size) t.flush() - self.assertEqual([b"a" * (chunk_size - 1), b"efgbcd"], buf._write_stack) + self.assertEqual([b"abcdef", b"middle", b"g"*chunk_size], + buf._write_stack) class PyTextIOWrapperTest(TextIOWrapperTest): From 26eadbe2637e695a9cfb1c8945c47800819efb12 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 3 Jun 2024 14:27:40 +0900 Subject: [PATCH 13/13] update changelog --- .../next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst index b2006ba7968d24..f9b764ae0c49b3 100644 --- a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -1 +1 @@ -Fix bug in :meth:`!io.TextIOWrapper.write` method, which causes wrapper's internal buffer to be rewritten during flushing of large data. +Fix :meth:`!io.TextIOWrapper.write` method breaks internal buffer when the method is called again during flushing internal buffer.