diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 2445b008eb5a75..7d3596622862ea 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1659,6 +1659,33 @@ or `the MSDN `_ on Windo :exc:`InterruptedError` exception (see :pep:`475` for the rationale). +.. function:: readinto(fd, buffer, /) + + Read from a file descriptor *fd* into a mutable + :ref:`buffer object ` *buffer*. + + The *buffer* should be mutable and :term:`bytes-like `. On + success, returns the number of bytes read. Less bytes may be read than the + size of the buffer. The underlying system call will be retried when + interrupted by a signal, unless the signal handler raises an exception. + Other errors will not be retried and an error will be raised. + + Returns 0 if *fd* is at end of file or if the provided *buffer* has + length 0 (which can be used to check for errors without reading data). + Never returns negative. + + .. note:: + + This function is intended for low-level I/O and must be applied to a file + descriptor as returned by :func:`os.open` or :func:`os.pipe`. To read a + "file object" returned by the built-in function :func:`open`, or + :data:`sys.stdin`, use its member functions, for example + :meth:`io.BufferedIOBase.readinto`, :meth:`io.BufferedIOBase.read`, or + :meth:`io.TextIOBase.read` + + .. versionadded:: next + + .. function:: sendfile(out_fd, in_fd, offset, count) sendfile(out_fd, in_fd, offset, count, headers=(), trailers=(), flags=0) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 531c5ed6226fe4..374587489bdb62 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -561,6 +561,10 @@ os to the :mod:`os` module. (Contributed by James Roy in :gh:`127688`.) +* Add the :func:`os.readinto` function to read into a + :ref:`buffer object ` from a file descriptor. + (Contributed by Cody Maloney in :gh:`129205`.) + pathlib ------- diff --git a/Lib/test/_test_eintr.py b/Lib/test/_test_eintr.py index 493932d6c6d441..ae799808ca067e 100644 --- a/Lib/test/_test_eintr.py +++ b/Lib/test/_test_eintr.py @@ -152,6 +152,37 @@ def test_read(self): self.assertEqual(data, os.read(rd, len(data))) self.assertEqual(proc.wait(), 0) + def test_readinto(self): + rd, wr = os.pipe() + self.addCleanup(os.close, rd) + # wr closed explicitly by parent + + # the payload below are smaller than PIPE_BUF, hence the writes will be + # atomic + datas = [b"hello", b"world", b"spam"] + + code = '\n'.join(( + 'import os, sys, time', + '', + 'wr = int(sys.argv[1])', + 'datas = %r' % datas, + 'sleep_time = %r' % self.sleep_time, + '', + 'for data in datas:', + ' # let the parent block on read()', + ' time.sleep(sleep_time)', + ' os.write(wr, data)', + )) + + proc = self.subprocess(code, str(wr), pass_fds=[wr]) + with kill_on_error(proc): + os.close(wr) + for data in datas: + buffer = bytearray(len(data)) + self.assertEqual(os.readinto(rd, buffer), len(data)) + self.assertEqual(buffer, data) + self.assertEqual(proc.wait(), 0) + def test_write(self): rd, wr = os.pipe() self.addCleanup(os.close, wr) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index d2c4dff3c9a0e5..d1bdf784b0df16 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -230,6 +230,93 @@ def test_read(self): self.assertEqual(type(s), bytes) self.assertEqual(s, b"spam") + def test_readinto(self): + with open(os_helper.TESTFN, "w+b") as fobj: + fobj.write(b"spam") + fobj.flush() + fd = fobj.fileno() + os.lseek(fd, 0, 0) + # Oversized so readinto without hitting end. + buffer = bytearray(7) + s = os.readinto(fd, buffer) + self.assertEqual(type(s), int) + self.assertEqual(s, 4) + # Should overwrite the first 4 bytes of the buffer. + self.assertEqual(buffer[:4], b"spam") + + # Readinto at EOF should return 0 and not touch buffer. + buffer[:] = b"notspam" + s = os.readinto(fd, buffer) + self.assertEqual(type(s), int) + self.assertEqual(s, 0) + self.assertEqual(bytes(buffer), b"notspam") + s = os.readinto(fd, buffer) + self.assertEqual(s, 0) + self.assertEqual(bytes(buffer), b"notspam") + + # Readinto a 0 length bytearray when at EOF should return 0 + self.assertEqual(os.readinto(fd, bytearray()), 0) + + # Readinto a 0 length bytearray with data available should return 0. + os.lseek(fd, 0, 0) + self.assertEqual(os.readinto(fd, bytearray()), 0) + + @unittest.skipUnless(hasattr(os, 'get_blocking'), + 'needs os.get_blocking() and os.set_blocking()') + @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") + def test_readinto_non_blocking(self): + # Verify behavior of a readinto which would block on a non-blocking fd. + r, w = os.pipe() + try: + os.set_blocking(r, False) + with self.assertRaises(BlockingIOError): + os.readinto(r, bytearray(5)) + + # Pass some data through + os.write(w, b"spam") + self.assertEqual(os.readinto(r, bytearray(4)), 4) + + # Still don't block or return 0. + with self.assertRaises(BlockingIOError): + os.readinto(r, bytearray(5)) + + # At EOF should return size 0 + os.close(w) + w = None + self.assertEqual(os.readinto(r, bytearray(5)), 0) + self.assertEqual(os.readinto(r, bytearray(5)), 0) # Still EOF + + finally: + os.close(r) + if w is not None: + os.close(w) + + def test_readinto_badarg(self): + with open(os_helper.TESTFN, "w+b") as fobj: + fobj.write(b"spam") + fobj.flush() + fd = fobj.fileno() + os.lseek(fd, 0, 0) + + for bad_arg in ("test", bytes(), 14): + with self.subTest(f"bad buffer {type(bad_arg)}"): + with self.assertRaises(TypeError): + os.readinto(fd, bad_arg) + + with self.subTest("doesn't work on file objects"): + with self.assertRaises(TypeError): + os.readinto(fobj, bytearray(5)) + + # takes two args + with self.assertRaises(TypeError): + os.readinto(fd) + + # No data should have been read with the bad arguments. + buffer = bytearray(4) + s = os.readinto(fd, buffer) + self.assertEqual(s, 4) + self.assertEqual(buffer, b"spam") + @support.cpython_only # Skip the test on 32-bit platforms: the number of bytes must fit in a # Py_ssize_t type @@ -249,6 +336,29 @@ def test_large_read(self, size): # operating system is free to return less bytes than requested. self.assertEqual(data, b'test') + + @support.cpython_only + # Skip the test on 32-bit platforms: the number of bytes must fit in a + # Py_ssize_t type + @unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX, + "needs INT_MAX < PY_SSIZE_T_MAX") + @support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False) + def test_large_readinto(self, size): + self.addCleanup(os_helper.unlink, os_helper.TESTFN) + create_file(os_helper.TESTFN, b'test') + + # Issue #21932: For readinto the buffer contains the length rather than + # a length being passed explicitly to read, should still get capped to a + # valid size / not raise an OverflowError for sizes larger than INT_MAX. + buffer = bytearray(INT_MAX + 10) + with open(os_helper.TESTFN, "rb") as fp: + length = os.readinto(fp.fileno(), buffer) + + # The test does not try to read more than 2 GiB at once because the + # operating system is free to return less bytes than requested. + self.assertEqual(length, 4) + self.assertEqual(buffer[:4], b'test') + def test_write(self): # os.write() accepts bytes- and buffer-like objects but not strings fd = os.open(os_helper.TESTFN, os.O_CREAT | os.O_WRONLY) @@ -2467,6 +2577,10 @@ def test_lseek(self): def test_read(self): self.check(os.read, 1) + @unittest.skipUnless(hasattr(os, 'readinto'), 'test needs os.readinto()') + def test_readinto(self): + self.check(os.readinto, bytearray(5)) + @unittest.skipUnless(hasattr(os, 'readv'), 'test needs os.readv()') def test_readv(self): buf = bytearray(10) diff --git a/Misc/NEWS.d/next/Library/2025-01-22-16-54-25.gh-issue-129205.FMqrUt.rst b/Misc/NEWS.d/next/Library/2025-01-22-16-54-25.gh-issue-129205.FMqrUt.rst new file mode 100644 index 00000000000000..c4ed76408f32f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-22-16-54-25.gh-issue-129205.FMqrUt.rst @@ -0,0 +1 @@ +Add :func:`os.readinto` to read into a :ref:`buffer object ` from a file descriptor. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 96bf21dced92f0..abeb9c3e3e12b1 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -7577,6 +7577,62 @@ os_read(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(os_readinto__doc__, +"readinto($module, fd, buffer, /)\n" +"--\n" +"\n" +"Read into a buffer object from a file descriptor.\n" +"\n" +"The buffer should be mutable and bytes-like. On success, returns the number of\n" +"bytes read. Less bytes may be read than the size of the buffer. The underlying\n" +"system call will be retried when interrupted by a signal, unless the signal\n" +"handler raises an exception. Other errors will not be retried and an error will\n" +"be raised.\n" +"\n" +"Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0\n" +"(which can be used to check for errors without reading data). Never returns\n" +"negative."); + +#define OS_READINTO_METHODDEF \ + {"readinto", _PyCFunction_CAST(os_readinto), METH_FASTCALL, os_readinto__doc__}, + +static Py_ssize_t +os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer); + +static PyObject * +os_readinto(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int fd; + Py_buffer buffer = {NULL, NULL}; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("readinto", nargs, 2, 2)) { + goto exit; + } + fd = PyLong_AsInt(args[0]); + if (fd == -1 && PyErr_Occurred()) { + goto exit; + } + if (PyObject_GetBuffer(args[1], &buffer, PyBUF_WRITABLE) < 0) { + _PyArg_BadArgument("readinto", "argument 2", "read-write bytes-like object", args[1]); + goto exit; + } + _return_value = os_readinto_impl(module, fd, &buffer); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + /* Cleanup for buffer */ + if (buffer.obj) { + PyBuffer_Release(&buffer); + } + + return return_value; +} + #if defined(HAVE_READV) PyDoc_STRVAR(os_readv__doc__, @@ -13140,4 +13196,4 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ -/*[clinic end generated code: output=34cb96bd07bcef90 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8318c26fc2cd236c input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index fb9e55a57703fc..a35a848a7ca4b8 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -11433,6 +11433,38 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length) return buffer; } +/*[clinic input] +os.readinto -> Py_ssize_t + fd: int + buffer: Py_buffer(accept={rwbuffer}) + / + +Read into a buffer object from a file descriptor. + +The buffer should be mutable and bytes-like. On success, returns the number of +bytes read. Less bytes may be read than the size of the buffer. The underlying +system call will be retried when interrupted by a signal, unless the signal +handler raises an exception. Other errors will not be retried and an error will +be raised. + +Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0 +(which can be used to check for errors without reading data). Never returns +negative. +[clinic start generated code]*/ + +static Py_ssize_t +os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer) +/*[clinic end generated code: output=8091a3513c683a80 input=d40074d0a68de575]*/ +{ + assert(buffer->len >= 0); + Py_ssize_t result = _Py_read(fd, buffer->buf, buffer->len); + /* Ensure negative is never returned without an error. Simplifies calling + code. _Py_read should succeed, possibly reading 0 bytes, _or_ set an + error. */ + assert(result >= 0 || (result == -1 && PyErr_Occurred())); + return result; +} + #if (defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__) \ || defined(__APPLE__))) \ || defined(HAVE_READV) || defined(HAVE_PREADV) || defined (HAVE_PREADV2) \ @@ -16973,6 +17005,7 @@ static PyMethodDef posix_methods[] = { OS_LOCKF_METHODDEF OS_LSEEK_METHODDEF OS_READ_METHODDEF + OS_READINTO_METHODDEF OS_READV_METHODDEF OS_PREAD_METHODDEF OS_PREADV_METHODDEF