8000 gh-129205: Add os.readinto API for reading data into a caller provided buffer by cmaloney · Pull Request #129211 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-129205: Add os.readinto API for reading data into a caller provided buffer #129211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Jan 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Doc/library/os.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1659,6 +1659,33 @@ or `the MSDN <https://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Windo
:exc:`InterruptedError` exception (see :pep:`475` for the rationale).


.. function:: readinto(fd, buffer, /)

Read from a file descriptor *fd* into a mutable
:ref:`buffer object <bufferobjects>` *buffer*.

The *buffer* should be mutable and :term:`bytes-like <bytes-like object>`. On
success, returns the number of bytes read. Less bytes may be read than the
size of the buffer. The underlying system call will be retried when
interrupted by a signal, unless the signal handler raises an exception.
Other errors will not be retried and an error will be raised.

Returns 0 if *fd* is at end of file or if the provided *buffer* has
length 0 (which can be used to check for errors without reading data).
Never returns negative.

.. note::

This function is intended for low-level I/O and must be applied to a file
descriptor as returned by :func:`os.open` or :func:`os.pipe`. To read a
"file object" returned by the built-in function :func:`open`, or
:data:`sys.stdin`, use its member functions, for example
:meth:`io.BufferedIOBase.readinto`, :meth:`io.BufferedIOBase.read`, or
:meth:`io.TextIOBase.read`

.. versionadded:: next


.. function:: sendfile(out_fd, in_fd, offset, count)
sendfile(out_fd, in_fd, offset, count, headers=(), trailers=(), flags=0)

Expand Down
4 changes: 4 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,10 @@ os
to the :mod:`os` module.
(Contributed by James Roy in :gh:`127688`.)

* Add the :func:`os.readinto` function to read into a
:ref:`buffer object <bufferobjects>` from a file descriptor.
(Contributed by Cody Maloney in :gh:`129205`.)


pathlib
-------
Expand Down
31 changes: 31 additions & 0 deletions Lib/test/_test_eintr.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,37 @@ def test_read(self):
self.assertEqual(data, os.read(rd, len(data)))
self.assertEqual(proc.wait(), 0)

def test_readinto(self):
rd, wr = os.pipe()
self.addCleanup(os.close, rd)
# wr closed explicitly by parent

# the payload below are smaller than PIPE_BUF, hence the writes will be
# atomic
datas = [b"hello", b"world", b"spam"]

code = '\n'.join((
'import os, sys, time',
'',
'wr = int(sys.argv[1])',
'datas = %r' % datas,
'sleep_time = %r' % self.sleep_time,
'',
'for data in datas:',
' # let the parent block on read()',
' time.sleep(sleep_time)',
' os.write(wr, data)',
))

proc = self.subprocess(code, str(wr), pass_fds=[wr])
with kill_on_error(proc):
os.close(wr)
for data in datas:
buffer = bytearray(len(data))
self.assertEqual(os.readinto(rd, buffer), len(data))
self.assertEqual(buffer, data)
self.assertEqual(proc.wait(), 0)

def test_write(self):
rd, wr = os.pipe()
self.addCleanup(os.close, wr)
Expand Down
114 changes: 114 additions & 0 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,93 @@ def test_read(self):
self.assertEqual(type(s), bytes)
self.assertEqual(s, b"spam")

def test_readinto(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)
# Oversized so readinto without hitting end.
buffer = bytearray(7)
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 4)
# Should overwrite the first 4 bytes of the buffer.
self.assertEqual(buffer[:4], b"spam")

# Readinto at EOF should return 0 and not touch buffer.
buffer[:] = b"notspam"
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")
s = os.readinto(fd, buffer)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")

# Readinto a 0 length bytearray when at EOF should return 0
self.assertEqual(os.readinto(fd, bytearray()), 0)

# Readinto a 0 length bytearray with data available should return 0.
os.lseek(fd, 0, 0)
self.assertEqual(os.readinto(fd, bytearray()), 0)

@unittest.skipUnless(hasattr(os, 'get_blocking'),
'needs os.get_blocking() and os.set_blocking()')
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
def test_readinto_non_blocking(self):
# Verify behavior of a readinto which would block on a non-blocking fd.
r, w = os.pipe()
try:
os.set_blocking(r, False)
with self.assertRaises(BlockingIOError):
os.readinto(r, bytearray(5))

# Pass some data through
os.write(w, b"spam")
self.assertEqual(os.readinto(r, bytearray(4)), 4)

# Still don't block or return 0.
with self.assertRaises(BlockingIOError):
os.readinto(r, bytearray(5))

# At EOF should return size 0
os.close(w)
w = None
self.assertEqual(os.readinto(r, bytearray(5)), 0)
self.assertEqual(os.readinto(r, bytearray(5)), 0) # Still EOF

finally:
os.close(r)
if w is not None:
os.close(w)

def test_readinto_badarg(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)

for bad_arg in ("test", bytes(), 14):
with self.subTest(f"bad buffer {type(bad_arg)}"):
with self.assertRaises(TypeError):
os.readinto(fd, bad_arg)

with self.subTest("doesn't work on file objects"):
with self.assertRaises(TypeError):
os.readinto(fobj, bytearray(5))

# takes two args
with self.assertRaises(TypeError):
os.readinto(fd)

# No data should have been read with the bad arguments.
buffer = bytearray(4)
s = os.readinto(fd, buffer)
self.assertEqual(s, 4)
self.assertEqual(buffer, b"spam")

@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
Expand All @@ -249,6 +336,29 @@ def test_large_read(self, size):
# operating system is free to return less bytes than requested.
self.assertEqual(data, b'test')


@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
"needs INT_MAX < PY_SSIZE_T_MAX")
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
def test_large_readinto(self, size):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
create_file(os_helper.TESTFN, b'test')

# Issue #21932: For readinto the buffer contains the length rather than
# a length being passed explicitly to read, should still get capped to a
# valid size / not raise an OverflowError for sizes larger than INT_MAX.
buffer = bytearray(INT_MAX + 10)
with open(os_helper.TESTFN, "rb") as fp:
length = os.readinto(fp.fileno(), buffer)

# The test does not try to read more than 2 GiB at once because the
# operating system is free to return less bytes than requested.
self.assertEqual(length, 4)
self.assertEqual(buffer[:4], b'test')

def test_write(self):
# os.write() accepts bytes- and buffer-like objects but not strings
fd = os.open(os_helper.TESTFN, os.O_CREAT | os.O_WRONLY)
Expand Down Expand Up @@ -2467,6 +2577,10 @@ def test_lseek(self):
def test_read(self):
self.check(os.read, 1)

@unittest.skipUnless(hasattr(os, 'readinto'), 'test needs os.readinto()')
def test_readinto(self):
self.check(os.readinto, bytearray(5))

@unittest.skipUnless(hasattr(os, 'readv'), 'test needs os.readv()')
def test_readv(self):
buf = bytearray(10)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add :func:`os.readinto` to read into a :ref:`buffer object <bufferobjects>` from a file descriptor.
58 changes: 57 additions & 1 deletion Modules/clinic/posixmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions Modules/posixmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -11433,6 +11433,38 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length)
return buffer;
}

/*[clinic input]
os.readinto -> Py_ssize_t
fd: int
buffer: Py_buffer(accept={rwbuffer})
/

Read into a buffer object from a file descriptor.

The buffer should be mutable and bytes-like. On success, returns the number of
bytes read. Less bytes may be read than the size of the buffer. The underlying
system call will be retried when interrupted by a signal, unless the signal
handler raises an exception. Other errors will not be retried and an error will
be raised.

Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0
(which can be used to check for errors without reading data). Never returns
negative.
[clinic start generated code]*/

static Py_ssize_t
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer)
/*[clinic end generated code: output=8091a3513c683a80 input=d40074d0a68de575]*/
{
assert(buffer->len >= 0);
Py_ssize_t result = _Py_read(fd, buffer->buf, buffer->len);
/* Ensure negative is never returned without an error. Simplifies calling
code. _Py_read should succeed, possibly reading 0 bytes, _or_ set an
error. */
assert(result >= 0 || (result == -1 && PyErr_Occurred()));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: Clinic wrapping code takes -1 return + PyErr_Occurred to indicate error and will return a NULL PyObject* for the function. So should never get back a PyLong object which is negative.

return result;
}

#if (defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__) \
|| defined(__APPLE__))) \
|| defined(HAVE_READV) || defined(HAVE_PREADV) || defined (HAVE_PREADV2) \
Expand Down Expand Up @@ -16973,6 +17005,7 @@ static PyMethodDef posix_methods[] = {
OS_LOCKF_METHODDEF
OS_LSEEK_METHODDEF
OS_READ_METHODDEF
OS_READINTO_METHODDEF
OS_READV_METHODDEF
OS_PREAD_METHODDEF
OS_PREADV_METHODDEF
Expand Down
Loading
0