From e3502910899112eb9e96aeb6ca4930e6e3640e8a Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Sat, 8 Feb 2025 16:07:46 -0800
Subject: [PATCH 1/8] WIP: Pyio readfrom

---
 .../pycore_global_objects_fini_generated.h    |   1 +
 Include/internal/pycore_global_strings.h      |   1 +
 .../internal/pycore_runtime_init_generated.h  |   1 +
 .../internal/pycore_unicodeobject_generated.h |   4 +
 Lib/_pyio.py                                  | 111 +++++++---
 Lib/subprocess.py                             |   9 +-
 Modules/_io/bytesio.c                         | 203 ++++++++++++++++++
 Modules/_io/clinic/bytesio.c.h                | 103 ++++++++-
 8 files changed, 402 insertions(+), 31 deletions(-)

diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 90214a314031d1..b07db953e2c6bb 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -916,6 +916,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(entrypoint));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(env));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(estimate));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 97a75d0c46c867..a93cc79bda63c6 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -405,6 +405,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(entrypoint)
         STRUCT_FOR_ID(env)
         STRUCT_FOR_ID(errors)
+        STRUCT_FOR_ID(estimate)
         STRUCT_FOR_ID(event)
         STRUCT_FOR_ID(eventmask)
         STRUCT_FOR_ID(exc_type)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 4f928cc050bf8e..81bd0aefc668ee 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -914,6 +914,7 @@ extern "C" {
     INIT_ID(entrypoint), \
     INIT_ID(env), \
     INIT_ID(errors), \
+    INIT_ID(estimate), \
     INIT_ID(event), \
     INIT_ID(eventmask), \
     INIT_ID(exc_type), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index 5b78d038fc1192..cad8f2731fc222 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1416,6 +1416,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(estimate);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(event);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index f7370dff19efc8..4ebed101d688e8 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -926,6 +926,83 @@ def read1(self, size=-1):
         """
         return self.read(size)
 
+    def readfrom(self, file, /, *, estimate=None, limit=None):
+        """Efficiently read from the provided file and return True if hit end.
+
+        Returns True if and only if a read into a non-zero length buffer
+        returns 0 bytes. On most systems this indicates end of file / stream.
+        """
+        if self.closed:
+            raise ValueError("read from closed file")
+
+        # In order to detect end of file, need a read() of at least 1
+        # byte which returns size 0. Oversize the buffer by 1 byte so the
+        # I/O can be completed with two read() calls (one for all data, one
+        # for EOF) without needing to resize the buffer.
+        # FIXME(cmaloney): This should probably be a memoryview....
+        if estimate is not None:
+            estimate = int(estimate) + 1
+
+        # Cap to limit
+        if limit is not None:
+            limit = int(limit)
+            if limit <= 0:
+                raise ValueError(f"limit must be larger than 0, got {limit}")
+
+            if target_read is not None:
+                target_read = min(target_read, limit)
+
+        if self._pos - len(self._buffer) < estimate:
+            self._buffer.resize(self._pos + target_read)
+
+        # FIXME(cmaloney): Expand buffer if needed
+        start_pos = self._pos
+        try:
+            while True:
+                bytes_read = self._pos - start_pos
+                if limit is not None and limit <= bytes_read:
+                    return False
+
+                if target_read <= 0:
+                    # FIXME(cmaloney): Check this matces
+                    self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))
+
+                if limit is not None and bytes_read >= limit:
+                    return False
+
+                # Make sure there is space for the read.
+                if target_read :
+
+
+                # Cap target read
+                # Hit cap, not EOF.
+                bytes_read = self._pos - start_pos
+                if bytes_read >= cap:
+                    return False
+
+                read_size = len(self._buffer) - self._pos
+
+                # Calculate next read size.
+                if self._pos >= len(self._buffer):
+                    self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))
+
+                if read_size <= 0:
+                    # Fill remaining buffer, but never read more than cap.
+                    read_size = len(self._buffer) - self._pos
+                    read_size = min(start_pos - self, cap - bytes_read)
+
+                n = os.readinto(file, memoryview(self._buffer)[self._pos:])
+                self._pos += n
+                bytes_read += n
+                read_size -= n
+                if read_size <= 0:
+                    read_size = _new_buffersize(bytes_read)
+            assert len(result) - bytes_read >= 1, \
+                "os.readinto buffer size 0 will result in erroneous EOF / returns 0"
+        except BlockingIOError:
+            if not bytes_read:
+                return None
+
     def write(self, b):
         if self.closed:
             raise ValueError("write to closed file")
@@ -1666,38 +1743,24 @@ def readall(self):
         """
         self._checkClosed()
         self._checkReadable()
-        if self._stat_atopen is None or self._stat_atopen.st_size <= 0:
-            bufsize = DEFAULT_BUFFER_SIZE
-        else:
-            # In order to detect end of file, need a read() of at least 1
-            # byte which returns size 0. Oversize the buffer by 1 byte so the
-            # I/O can be completed with two read() calls (one for all data, one
-            # for EOF) without needing to resize the buffer.
-            bufsize = self._stat_atopen.st_size + 1
-
-            if self._stat_atopen.st_size > 65536:
+        estimate = None
+        if self._stat_atopen and self._stat_atopen.st_size >= 0:
+            estimate = self._stat_atopen.st_size
+            if estimate > 65536:
                 try:
                     pos = os.lseek(self._fd, 0, SEEK_CUR)
-                    if self._stat_atopen.st_size >= pos:
-                        bufsize = self._stat_atopen.st_size - pos + 1
+                    estimate = estimate - pos if estimate > pos else 0
                 except OSError:
                     pass
 
-        result = bytearray(bufsize)
-        bytes_read = 0
+        bio = BytesIO()
         try:
-            while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
-                bytes_read += n
-                if bytes_read >= len(result):
-                    result.resize(_new_buffersize(bytes_read))
+            bio.readfrom(self._fd, estimate=estimate)
+            return bio.getvalue()
         except BlockingIOError:
-            if not bytes_read:
-                return None
+            result = bio.getvalue()
+            return result if result else None
 
-        assert len(result) - bytes_read >= 1, \
-            "os.readinto buffer size 0 will result in erroneous EOF / returns 0"
-        result.resize(bytes_read)
-        return bytes(result)
 
     def readinto(self, buffer):
         """Same as RawIOBase.readinto()."""
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
index 2044d2a42897e9..e0f449b72011dc 100644
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -1921,12 +1921,9 @@ def _execute_child(self, args, executable, preexec_fn, close_fds,
 
                 # Wait for exec to fail or succeed; possibly raising an
                 # exception (limited in size)
-                errpipe_data = bytearray()
-                while True:
-                    part = os.read(errpipe_read, 50000)
-                    errpipe_data += part
-                    if not part or len(errpipe_data) > 50000:
-                        break
+                bio = io.BytesIO()
+                bio.readfrom(errpipe_read, estimate=0, limit=50_000)
+                errpipe_data = bio.getvalue()
             finally:
                 # be sure the FD is closed no matter what
                 os.close(errpipe_read)
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c
index dc4e40b9f09a1d..ae076d1cec7b05 100644
--- a/Modules/_io/bytesio.c
+++ b/Modules/_io/bytesio.c
@@ -5,6 +5,9 @@
 #include <stddef.h>               // offsetof()
 #include "_iomodule.h"
 
+
+#define STACK_BUFER_SIZE    1024
+
 /*[clinic input]
 module _io
 class _io.BytesIO "bytesio *" "clinic_state()->PyBytesIO_Type"
@@ -465,6 +468,205 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size)
     return _io_BytesIO_read_impl(self, size);
 }
 
+static size_t
+_bytesio_new_buffersize(size_t bytes_read)
+{
+    size_t addend;
+
+    /* Expand the buffer by an amount proportional to the current size,
+       giving us amortized linear-time behavior.  For bigger sizes, use a
+       less-than-double growth factor to avoid excessive allocation. */
+    assert(bytes_read <= PY_SSIZE_T_MAX);
+    if (bytes_read > 65536)
+        addend = bytes_read >> 3;
+    else
+        addend = 256 + bytes_read;
+    if (addend < 8 * 1024)
+        /* Avoid tiny read() calls. */
+        addend = 8 * 1024;
+    return  bytes_read + addend;
+}
+
+/* Read from a fd where there is no data expected to be read.
+This is faster (less allocations, less copies) when there is no data, at the
+expense of slightly slower if there is actual data to read. Falls back to normal
+read loop if more than one buffer of data.
+
+-1 == error, 0 == hit cap or blocked, exit, 1 == hit eof / True return, 2 == read more
+*/
+static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_size) {
+    assert(*cap_size > 0 ** "Must attempt to read at least one byte.");
+    char local_buffer[STACK_BUFER_SIZE];
+    Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *cap_size);
+    Py_ssize_t result = _Py_read(fd, local_buffer, read_size);
+
+    /* Hit EOF in a single read, return True. */
+    if (result == 0) {
+        return 1;
+    }
+    if (result == -1) {
+        /* BlockingIOError -> return False (didn't find EOF). */
+        if (errno == EAGAIN) {
+            PyErr_Clear();
+            return 0;
+        }
+        return -1;
+    }
+
+    /* Got data, copy across to the buf, then proceed with normal read loop.
+
+    FIXME? The temporary bytes object is an unnecessary copy + allocation.
+        yea: faster / less copies, remove some redundant checks
+        nay: resizing, appending, copying, updating pointers is a lot. */
+    PyObject *bytes = PyBytes_FromStringAndSize(local_buffer, result);
+    if (!bytes) {
+        return -1;
+    }
+    result = write_bytes(self, bytes);
+    Py_DECREF(bytes);
+    if (result < 0) {
+        return -1;
+    }
+    /* Hit cap, nothing left to do. */
+    if (result == *cap_size) {
+        return 0;
+    }
+    *cap_size -= result;
+    return 2;
+}
+
+
+/*[clinic input]
+_io.BytesIO.readfrom -> bool
+    file: int
+    /
+    *
+    estimate: Py_ssize_t(accept={int, NoneType}) = -1
+    limit: Py_ssize_t(accept={int, NoneType}) = -1
+
+Efficiently read from the provided file and return True if hit end of file.
+
+Returns True if and only if a read into a non-zero length buffer returns 0
+bytes. On most systems this indicates end of file / stream.
+
+FIXME?: Allow fileobj that provides readinto.?
+FIXME?:Allow fileobj that only has read?
+
+If a readinto call raises NonBlockingError or returns None, data returned to
+that point will be stored in buffer, and will return False. For other exceptions
+while reading, as much data as possible will be in the buffer.
+
+FIXME: BlockingIOError contains data from partial reads. Append it.
+    -> Include test that no data is lost w/ multiple repeated blocks
+        (There is one already in tests, make sure this is exercised and passes
+         it)
+FIXME: Does this need to document that all reads are Limited to PY_SSIZE_T_MAX.
+FIXME? It would be nice if this could support a timeout, but probably a feature
+       for later.
+[clinic start generated code]*/
+
+static int
+_io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
+                          Py_ssize_t limit)
+/*[clinic end generated code: output=71dcfcf7e9a50527 input=9bce10ea48db6415]*/
+{
+    if (check_closed(self)) {
+        return -1;
+    }
+    if (check_exports(self)) {
+        return -1;
+    }
+    /* Cap all reads to PY_SSIZE_T_MAX */
+    Py_ssize_t cap_size = Py_MIN(Py_MAX(limit, 0), PY_SSIZE_T_MAX);
+    assert(cap_size > 0);
+
+    /* Try and get estimated_size in a single read. */
+    Py_ssize_t read_size = DEFAULT_BUFFER_SIZE;
+    if (estimate > 0) {
+        /* In order to detect end of file, need a read() of at
+            least 1 byte which returns size 0. Oversize the buffer
+            by 1 byte so the I/O can be completed with two read()
+            calls (one for all data, one for EOF) without needing
+            to resize the buffer. */
+        read_size = estimate + ((estimate <= PY_SSIZE_T_MAX - 1) ? 1 : 0);
+    } else if (estimate == 0 || cap_size < STACK_BUFER_SIZE) {
+        /* A number of things in the normal path expect no data, use a small
+           temp buffer for those, only expanding buffer if absolutely needed. */
+        Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &cap_size);
+        if (result != 2) {
+            return result;
+        }
+    }
+
+    /* Never read more than limit. */
+    read_size = Py_MIN(read_size, cap_size);
+    assert(read_size > 0);
+
+    Py_ssize_t current_size = PyBytes_GET_SIZE(self->buf);
+    if (PY_SSIZE_T_MAX - read_size - current_size > 0)
+        current_size += read_size;
+    else {
+        current_size = PY_SSIZE_T_MAX;
+    }
+    if (_PyBytes_Resize(&self->buf, current_size)) {
+        return -1;
+    }
+    Py_ssize_t bytes_read = 0;
+    Py_ssize_t found_eof = 0;
+    while (true) {
+        /* Expand buffer if needed. */
+        if (self->string_size >= current_size) {
+            Py_ssize_t target_size = _bytesio_new_buffersize(current_size);
+            if (target_size > PY_SSIZE_T_MAX || target_size <= 0) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "unbounded read returned more bytes "
+                                "than a Python bytes object can hold");
+                return -1;
+            }
+            if (_PyBytes_Resize(&self->buf, target_size)) {
+                return -1;
+            }
+            current_size = target_size;
+            read_size = target_size - current_size;
+        }
+        // DEBUG: printf("cs: %zd, ss: %zd, cap: %zd, read: %zd\n", current_size, self->string_size, cap_size, bytes_read);
+        read_size = Py_MIN(current_size - self->string_size, cap_size - bytes_read);
+        assert(read_size > 0); // Should always be reading some bytes.
+        assert(self->string_size + read_size <= current_size);
+        Py_ssize_t result = _Py_read(file,
+                                     PyBytes_AS_STRING(self->buf) + self->string_size,
+                                     read_size);
+        if (result == -1) {
+            // Blocking -> early exit without error.
+            if (errno == EAGAIN) {
+                PyErr_Clear();
+                break;
+            }
+            return  -1;
+        }
+        // Found EOF.
+        if (result == 0) {
+            found_eof = 1;
+            break;
+        }
+        assert(result >= 0); // Should have got bytes
+        self->string_size += result;
+        bytes_read += result;
+        assert(bytes_read <= cap_size); // Shold
+        if (bytes_read >= cap_size) {
+            found_eof = 0;
+            break;
+        }
+    }
+    // FIXME? There could be quite a bit of space between current_size and
+    // self->string_size, should this downsize then?
+    //
+    // yea: Save excess memory
+    // nay: Efficient pre-allocated buffer reuse if long lived, getting out the
+    //      bytes() will do anyways
+    return found_eof;
+}
+
 /*[clinic input]
 _io.BytesIO.readline
     size: Py_ssize_t(accept={int, NoneType}) = -1
@@ -1027,6 +1229,7 @@ static struct PyMethodDef bytesio_methods[] = {
     _IO_BYTESIO_WRITE_METHODDEF
     _IO_BYTESIO_WRITELINES_METHODDEF
     _IO_BYTESIO_READ1_METHODDEF
+    _IO_BYTESIO_READFROM_METHODDEF
     _IO_BYTESIO_READINTO_METHODDEF
     _IO_BYTESIO_READLINE_METHODDEF
     _IO_BYTESIO_READLINES_METHODDEF
diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h
index 5528df952c33fb..797fdf1027463b 100644
--- a/Modules/_io/clinic/bytesio.c.h
+++ b/Modules/_io/clinic/bytesio.c.h
@@ -233,6 +233,107 @@ _io_BytesIO_read1(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
     return return_value;
 }
 
+PyDoc_STRVAR(_io_BytesIO_readfrom__doc__,
+"readfrom($self, file, /, *, estimate=-1, limit=-1)\n"
+"--\n"
+"\n"
+"Efficiently read from the provided file and return True if hit end of file.\n"
+"\n"
+"Returns True if and only if a read into a non-zero length buffer returns 0\n"
+"bytes. On most systems this indicates end of file / stream.\n"
+"\n"
+"FIXME?: Allow fileobj that provides readinto.?\n"
+"FIXME?:Allow fileobj that only has read?\n"
+"\n"
+"If a readinto call raises NonBlockingError or returns None, data returned to\n"
+"that point will be stored in buffer, and will return False. For other exceptions\n"
+"while reading, as much data as possible will be in the buffer.\n"
+"\n"
+"FIXME: BlockingIOError contains data from partial reads. Append it.\n"
+"    -> Include test that no data is lost w/ multiple repeated blocks\n"
+"        (There is one already in tests, make sure this is exercised and passes\n"
+"         it)\n"
+"FIXME: Does this need to document that all reads are Limited to PY_SSIZE_T_MAX.\n"
+"FIXME? It would be nice if this could support a timeout, but probably a feature\n"
+"       for later.");
+
+#define _IO_BYTESIO_READFROM_METHODDEF    \
+    {"readfrom", _PyCFunction_CAST(_io_BytesIO_readfrom), METH_FASTCALL|METH_KEYWORDS, _io_BytesIO_readfrom__doc__},
+
+static int
+_io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
+                          Py_ssize_t limit);
+
+static PyObject *
+_io_BytesIO_readfrom(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(estimate), &_Py_ID(limit), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"", "estimate", "limit", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "readfrom",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[3];
+    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+    int file;
+    Py_ssize_t estimate = -1;
+    Py_ssize_t limit = -1;
+    int _return_value;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+            /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    file = PyLong_AsInt(args[0]);
+    if (file == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    if (!noptargs) {
+        goto skip_optional_kwonly;
+    }
+    if (args[1]) {
+        if (!_Py_convert_optional_to_ssize_t(args[1], &estimate)) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (!_Py_convert_optional_to_ssize_t(args[2], &limit)) {
+        goto exit;
+    }
+skip_optional_kwonly:
+    _return_value = _io_BytesIO_readfrom_impl((bytesio *)self, file, estimate, limit);
+    if ((_return_value == -1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_io_BytesIO_readline__doc__,
 "readline($self, size=-1, /)\n"
 "--\n"
@@ -535,4 +636,4 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs)
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=8a5e153bc7584b55 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=bab5a4081d518e22 input=a9049054013a1b77]*/

From fd457a80a37eb1a4d938d7194b2e2dac045d0095 Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Sun, 9 Feb 2025 17:40:58 -0800
Subject: [PATCH 2/8] Working

---
 Lib/_pyio.py          | 96 +++++++++++++++++++------------------------
 Modules/_io/bytesio.c | 59 +++++++++++++++-----------
 2 files changed, 79 insertions(+), 76 deletions(-)

diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 4ebed101d688e8..a3016b9d07d760 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -940,68 +940,60 @@ def readfrom(self, file, /, *, estimate=None, limit=None):
         # I/O can be completed with two read() calls (one for all data, one
         # for EOF) without needing to resize the buffer.
         # FIXME(cmaloney): This should probably be a memoryview....
+        target_read = None
         if estimate is not None:
-            estimate = int(estimate) + 1
+            target_read = int(estimate) + 1
+        else:
+            target_read = DEFAULT_BUFFER_SIZE
 
         # Cap to limit
         if limit is not None:
             limit = int(limit)
-            if limit <= 0:
+            if limit == 0:
+                return False
+            if limit < 0:
                 raise ValueError(f"limit must be larger than 0, got {limit}")
 
-            if target_read is not None:
-                target_read = min(target_read, limit)
+        # Expand buffer to get target read in one read when possible.
+        if limit is not None:
+            target_read = min(target_read, limit)
 
-        if self._pos - len(self._buffer) < estimate:
+        # Expand so target read definitely fits.
+        if len(self._buffer) < target_read + self._pos:
             self._buffer.resize(self._pos + target_read)
 
-        # FIXME(cmaloney): Expand buffer if needed
+        found_eof = False
         start_pos = self._pos
         try:
-            while True:
-                bytes_read = self._pos - start_pos
-                if limit is not None and limit <= bytes_read:
-                    return False
-
-                if target_read <= 0:
-                    # FIXME(cmaloney): Check this matces
-                    self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))
-
-                if limit is not None and bytes_read >= limit:
-                    return False
-
-                # Make sure there is space for the read.
-                if target_read :
-
-
-                # Cap target read
-                # Hit cap, not EOF.
-                bytes_read = self._pos - start_pos
-                if bytes_read >= cap:
-                    return False
-
-                read_size = len(self._buffer) - self._pos
-
-                # Calculate next read size.
-                if self._pos >= len(self._buffer):
-                    self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))
+            while n := os.readinto(file, memoryview(self._buffer)[self._pos:]):
+                self._pos += n
+                # Expand buffer if needed.
+                if len(self._buffer) - self._pos <= 0:
+                    bytes_read = self._pos - start_pos
+                    target_read = _new_buffersize(bytes_read)
+
+                    # Keep buffer size <= limit, so only need to check against
+                    # limit when resizing.
+                    if limit is not None:
+                        remaining = limit - bytes_read
+                        if remaining <= 0:
+                            assert remaining == 0, "should never pass limit"
+                            break
+                        target_read = min(remaining, target_read)
+
+                    self._buffer.resize(target_read + len(self._buffer))
 
-                if read_size <= 0:
-                    # Fill remaining buffer, but never read more than cap.
-                    read_size = len(self._buffer) - self._pos
-                    read_size = min(start_pos - self, cap - bytes_read)
+            else:
+                assert len(self._buffer) - self._pos >= 1, \
+                    "os.readinto buffer size 0 will result in erroneous EOF / returns 0"
+                found_eof = True
 
-                n = os.readinto(file, memoryview(self._buffer)[self._pos:])
-                self._pos += n
-                bytes_read += n
-                read_size -= n
-                if read_size <= 0:
-                    read_size = _new_buffersize(bytes_read)
-            assert len(result) - bytes_read >= 1, \
-                "os.readinto buffer size 0 will result in erroneous EOF / returns 0"
         except BlockingIOError:
-            if not bytes_read:
-                return None
+            pass
+
+        # Buffer must be
+        self._buffer.resize(self._pos)
+        return found_eof
 
     def write(self, b):
         if self.closed:
@@ -1754,12 +1746,10 @@ def readall(self):
                     pass
 
         bio = BytesIO()
-        try:
-            bio.readfrom(self._fd, estimate=estimate)
-            return bio.getvalue()
-        except BlockingIOError:
-            result = bio.getvalue()
-            return result if result else None
+        found_eof = bio.readfrom(self._fd, estimate=estimate)
+        result = bio.getvalue()
+        # No limit in readfrom, so not finding eof indicates blocked.
+        return result if result or found_eof else None
 
 
     def readinto(self, buffer):
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c
index ae076d1cec7b05..234594c69cc0fa 100644
--- a/Modules/_io/bytesio.c
+++ b/Modules/_io/bytesio.c
@@ -494,10 +494,10 @@ read loop if more than one buffer of data.
 
 -1 == error, 0 == hit cap or blocked, exit, 1 == hit eof / True return, 2 == read more
 */
-static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_size) {
-    assert(*cap_size > 0 ** "Must attempt to read at least one byte.");
+static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *limit) {
+    assert(*limit > 0 ** "Must attempt to read at least one byte.");
     char local_buffer[STACK_BUFER_SIZE];
-    Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *cap_size);
+    Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *limit);
     Py_ssize_t result = _Py_read(fd, local_buffer, read_size);
 
     /* Hit EOF in a single read, return True. */
@@ -528,10 +528,10 @@ static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_s
         return -1;
     }
     /* Hit cap, nothing left to do. */
-    if (result == *cap_size) {
+    if (result == *limit) {
         return 0;
     }
-    *cap_size -= result;
+    *limit -= result;
     return 2;
 }
 
@@ -576,9 +576,15 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
     if (check_exports(self)) {
         return -1;
     }
-    /* Cap all reads to PY_SSIZE_T_MAX */
-    Py_ssize_t cap_size = Py_MIN(Py_MAX(limit, 0), PY_SSIZE_T_MAX);
-    assert(cap_size > 0);
+    /* Limit all reads to PY_SSIZE_T_MAX */
+    if (limit < 0) {
+        limit = PY_SSIZE_T_MAX;
+    } else if (limit == 0) {
+        // Limit == 0. no read.
+        // FIXME(cmaloney): Should this guarantee at least one read? (os.readinto technically accepts 0 length...)
+        return 0;
+    }
+    assert(limit > 0);
 
     /* Try and get estimated_size in a single read. */
     Py_ssize_t read_size = DEFAULT_BUFFER_SIZE;
@@ -589,17 +595,17 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
             calls (one for all data, one for EOF) without needing
             to resize the buffer. */
         read_size = estimate + ((estimate <= PY_SSIZE_T_MAX - 1) ? 1 : 0);
-    } else if (estimate == 0 || cap_size < STACK_BUFER_SIZE) {
+    } else if (estimate == 0 || limit < STACK_BUFER_SIZE) {
         /* A number of things in the normal path expect no data, use a small
            temp buffer for those, only expanding buffer if absolutely needed. */
-        Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &cap_size);
+        Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &limit);
         if (result != 2) {
             return result;
         }
     }
 
     /* Never read more than limit. */
-    read_size = Py_MIN(read_size, cap_size);
+    read_size = Py_MIN(read_size, limit);
     assert(read_size > 0);
 
     Py_ssize_t current_size = PyBytes_GET_SIZE(self->buf);
@@ -616,21 +622,28 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
     while (true) {
         /* Expand buffer if needed. */
         if (self->string_size >= current_size) {
-            Py_ssize_t target_size = _bytesio_new_buffersize(current_size);
-            if (target_size > PY_SSIZE_T_MAX || target_size <= 0) {
+            if (current_size >= PY_SSIZE_T_MAX) {
                 PyErr_SetString(PyExc_OverflowError,
-                                "unbounded read returned more bytes "
-                                "than a Python bytes object can hold");
-                return -1;
+                    "unbounded read returned more bytes "
+                    "than a Python bytes object can hold");
+
             }
-            if (_PyBytes_Resize(&self->buf, target_size)) {
+            Py_ssize_t target_read = _bytesio_new_buffersize(bytes_read);
+            /* Never read more than limit bytes_read. */
+            target_read = Py_MIN(target_read, limit - bytes_read);
+
+            /* Buffer can't get larger than PY_SSIZE_T_MAX */
+            if (PY_SSIZE_T_MAX - current_size < target_read) {
+                target_read = PY_SSIZE_T_MAX - current_size;
+            }
+
+            current_size += target_read;
+            if (_PyBytes_Resize(&self->buf, current_size)) {
                 return -1;
             }
-            current_size = target_size;
-            read_size = target_size - current_size;
         }
-        // DEBUG: printf("cs: %zd, ss: %zd, cap: %zd, read: %zd\n", current_size, self->string_size, cap_size, bytes_read);
-        read_size = Py_MIN(current_size - self->string_size, cap_size - bytes_read);
+        // DEBUG: printf("cs: %zd, ss: %zd, limit: %zd, read: %zd\n", current_size, self->string_size, limit, bytes_read);
+        read_size = Py_MIN(current_size - self->string_size, limit - bytes_read);
         assert(read_size > 0); // Should always be reading some bytes.
         assert(self->string_size + read_size <= current_size);
         Py_ssize_t result = _Py_read(file,
@@ -652,8 +665,8 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
         assert(result >= 0); // Should have got bytes
         self->string_size += result;
         bytes_read += result;
-        assert(bytes_read <= cap_size); // Shold
-        if (bytes_read >= cap_size) {
+        assert(bytes_read <= limit); // Shold
+        if (bytes_read >= limit) {
             found_eof = 0;
             break;
         }

From 64abc8484be76cd899339e01cc7624415ea44c0e Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Sun, 9 Feb 2025 18:41:06 -0800
Subject: [PATCH 3/8] WIP: Moving towards FileIO.readall using BytesIO.readfrom

---
 Lib/_pyio.py          |   2 +-
 Modules/_io/bytesio.c |   1 +
 Modules/_io/fileio.c  | 106 ++++++++++++++----------------------------
 3 files changed, 36 insertions(+), 73 deletions(-)

diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index a3016b9d07d760..fa896890dffe47 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -949,7 +949,7 @@ def readfrom(self, file, /, *, estimate=None, limit=None):
         # Cap to limit
         if limit is not None:
             limit = int(limit)
-            if limit == 0:
+            if limit == 0:  # Nothing to read.
                 return False
             if limit < 0:
                 raise ValueError(f"limit must be larger than 0, got {limit}")
diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c
index 234594c69cc0fa..571204f4f2bbef 100644
--- a/Modules/_io/bytesio.c
+++ b/Modules/_io/bytesio.c
@@ -570,6 +570,7 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
                           Py_ssize_t limit)
 /*[clinic end generated code: output=71dcfcf7e9a50527 input=9bce10ea48db6415]*/
 {
+    /* FIXME: Cap to _PY_READ_MAX */
     if (check_closed(self)) {
         return -1;
     }
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index 89f1cfe6b20935..a4e7ab7c7d7ca3 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -735,40 +735,15 @@ static PyObject *
 _io_FileIO_readall_impl(fileio *self)
 /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
 {
-    Py_off_t pos, end;
-    PyObject *result;
-    Py_ssize_t bytes_read = 0;
-    Py_ssize_t n;
-    size_t bufsize;
+    PyObject* estimate_obj = Py_None;
+    PyObject* result = NULL;
 
     if (self->fd < 0) {
         return err_closed();
     }
 
-    if (self->stat_atopen != NULL && self->stat_atopen->st_size < _PY_READ_MAX) {
-        end = (Py_off_t)self->stat_atopen->st_size;
-    }
-    else {
-        end = -1;
-    }
-    if (end <= 0) {
-        /* Use a default size and resize as needed. */
-        bufsize = SMALLCHUNK;
-    }
-    else {
-        /* This is probably a real file. */
-        if (end > _PY_READ_MAX - 1) {
-            bufsize = _PY_READ_MAX;
-        }
-        else {
-            /* In order to detect end of file, need a read() of at
-               least 1 byte which returns size 0. Oversize the buffer
-               by 1 byte so the I/O can be completed with two read()
-               calls (one for all data, one for EOF) without needing
-               to resize the buffer. */
-            bufsize = (size_t)end + 1;
-        }
-
+    if (self->stat_atopen != NULL && self->stat_atopen->st_size >= 0) {
+        Py_ssize_t estimate = self->stat_atopen->st_size;
         /* While a lot of code does open().read() to get the whole contents
            of a file it is possible a caller seeks/reads a ways into the file
            then calls readall() to get the rest, which would result in allocating
@@ -785,58 +760,45 @@ _io_FileIO_readall_impl(fileio *self)
             _Py_END_SUPPRESS_IPH
             Py_END_ALLOW_THREADS
 
-            if (end >= pos && pos >= 0 && (end - pos) < (_PY_READ_MAX - 1)) {
-                bufsize = (size_t)(end - pos) + 1;
+            if (estimate >= pos) {
+                estimate -= pos;
             }
         }
+        estimate_obj = PyLong_FromSsize_t(estimate);
+        if(!estimate_obj) {
+            return NULL;
+        }
     }
 
-
-    result = PyBytes_FromStringAndSize(NULL, bufsize);
-    if (result == NULL)
+    /* Use BytesIO.readfrom(fd, estimate=estimate) */
+    PyObject *bytesio_class = PyImport_ImportModuleAttrString("io", "BytesIO");
+    if (!bytesio_class) {
         return NULL;
+    }
+    PyObject *bio = _Py_CallNoArgs(bytesio_class);
+    Py_DECREF(bytesioclass);
+    if (!bio) {
+        return  NULL;
+    }
 
-    while (1) {
-        if (bytes_read >= (Py_ssize_t)bufsize) {
-            bufsize = new_buffersize(self, bytes_read);
-            if (bufsize > PY_SSIZE_T_MAX || bufsize <= 0) {
-                PyErr_SetString(PyExc_OverflowError,
-                                "unbounded read returned more bytes "
-                                "than a Python bytes object can hold");
-                Py_DECREF(result);
-                return NULL;
-            }
-
-            if (PyBytes_GET_SIZE(result) < (Py_ssize_t)bufsize) {
-                if (_PyBytes_Resize(&result, bufsize) < 0)
-                    return NULL;
-            }
-        }
-
-        n = _Py_read(self->fd,
-                     PyBytes_AS_STRING(result) + bytes_read,
-                     bufsize - bytes_read);
-
-        if (n == 0)
-            break;
-        if (n == -1) {
-            if (errno == EAGAIN) {
-                PyErr_Clear();
-                if (bytes_read > 0)
-                    break;
-                Py_DECREF(result);
-                Py_RETURN_NONE;
-            }
-            Py_DECREF(result);
-            return NULL;
-        }
-        bytes_read += n;
+    // FIXME: self._fd, estimate=estimate
+    // self->fd, estimate=estimate, limit=_PY_READ_MAX
+    PyObject *args[] = {estimate_obj, }
+    PyObject *found_eof = PyObject_VectorcallMethod(bytesio_class, "readfrom");
+    if (!found_eof) {
+        Py_DCREF(bytesio_class);
+        return NULL;
+    }
+    PyObject *result = PyObject_CallMethodNoArgs(bytesio_class, &_Py_ID(getvalue));
+    if (!getvalue) {
+        return NULL;
     }
 
-    if (PyBytes_GET_SIZE(result) > bytes_read) {
-        if (_PyBytes_Resize(&result, bytes_read) < 0)
-            return NULL;
+    Py_DECREF(bytesio_class);
+    if (!bio) {
+        return NULL;
     }
+    _PyObject_Call(bio, )
     return result;
 }
 

From 5f6bee9e6bc756012d079a75ac3ee0af2a5577d3 Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Wed, 12 Feb 2025 11:32:02 -0800
Subject: [PATCH 4/8] WIP: FileIO.readall -> BytesIO._readfrom

---
 Modules/_io/fileio.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index a4e7ab7c7d7ca3..da4e1330997da3 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -735,6 +735,7 @@ static PyObject *
 _io_FileIO_readall_impl(fileio *self)
 /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
 {
+    Py_ssize_t pos = 0;
     PyObject* estimate_obj = Py_None;
     PyObject* result = NULL;
 
@@ -749,7 +750,7 @@ _io_FileIO_readall_impl(fileio *self)
            then calls readall() to get the rest, which would result in allocating
            more than required. Guard against that for larger files where we expect
            the I/O time to dominate anyways while keeping small files fast. */
-        if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) {
+        if (estimate > LARGE_BUFFER_CUTOFF_SIZE) {
             Py_BEGIN_ALLOW_THREADS
             _Py_BEGIN_SUPPRESS_IPH
 #ifdef MS_WINDOWS
@@ -770,6 +771,13 @@ _io_FileIO_readall_impl(fileio *self)
         }
     }
 
+    /*
+    bio = io.BytesIO();
+    found_eof = bio.readfrom(self->fd, estimate=estimate)
+    result = bio.getvalue()
+    return result if result or found_eof else None
+    */
+
     /* Use BytesIO.readfrom(fd, estimate=estimate) */
     PyObject *bytesio_class = PyImport_ImportModuleAttrString("io", "BytesIO");
     if (!bytesio_class) {

From 258025dff48ba9ef3f401ba8fb0cede9dca31ced Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Wed, 12 Feb 2025 16:33:20 -0800
Subject: [PATCH 5/8] io.FileIO.readall working via io.BytesIO.readall

---
 Modules/_io/fileio.c | 103 ++++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 45 deletions(-)

diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index da4e1330997da3..94676d5e339192 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -703,25 +703,6 @@ _io_FileIO_readinto_impl(fileio *self, PyTypeObject *cls, Py_buffer *buffer)
     return PyLong_FromSsize_t(n);
 }
 
-static size_t
-new_buffersize(fileio *self, size_t currentsize)
-{
-    size_t addend;
-
-    /* Expand the buffer by an amount proportional to the current size,
-       giving us amortized linear-time behavior.  For bigger sizes, use a
-       less-than-double growth factor to avoid excessive allocation. */
-    assert(currentsize <= PY_SSIZE_T_MAX);
-    if (currentsize > LARGE_BUFFER_CUTOFF_SIZE)
-        addend = currentsize >> 3;
-    else
-        addend = 256 + currentsize;
-    if (addend < SMALLCHUNK)
-        /* Avoid tiny read() calls. */
-        addend = SMALLCHUNK;
-    return addend + currentsize;
-}
-
 /*[clinic input]
 _io.FileIO.readall
 
@@ -737,7 +718,11 @@ _io_FileIO_readall_impl(fileio *self)
 {
     Py_ssize_t pos = 0;
     PyObject* estimate_obj = Py_None;
-    PyObject* result = NULL;
+    PyObject *args[3] = {NULL, NULL, NULL};
+    PyObject *fn_name = NULL;
+    PyObject *keyword = NULL;
+    PyObject *result = NULL;
+    PyObject *found_eof = NULL;
 
     if (self->fd < 0) {
         return err_closed();
@@ -771,42 +756,70 @@ _io_FileIO_readall_impl(fileio *self)
         }
     }
 
-    /*
-    bio = io.BytesIO();
-    found_eof = bio.readfrom(self->fd, estimate=estimate)
-    result = bio.getvalue()
-    return result if result or found_eof else None
-    */
-
-    /* Use BytesIO.readfrom(fd, estimate=estimate) */
-    PyObject *bytesio_class = PyImport_ImportModuleAttrString("io", "BytesIO");
+    /* bio = io.BytesIO();
+       found_eof = bio.readfrom(self->fd, estimate=estimate) */
+    PyObject *bytesio_class = PyImport_ImportModuleAttrString("_io", "BytesIO");
     if (!bytesio_class) {
+        Py_DECREF(estimate_obj);
         return NULL;
     }
-    PyObject *bio = _Py_CallNoArgs(bytesio_class);
-    Py_DECREF(bytesioclass);
-    if (!bio) {
+    args[2] = estimate_obj;
+    estimate_obj = NULL;
+
+    args[0] = PyObject_CallNoArgs(bytesio_class);
+    Py_DECREF(bytesio_class);
+    bytesio_class = NULL;
+    if (!args[0]) {
+        Py_DECREF(estimate_obj);
         return  NULL;
     }
 
-    // FIXME: self._fd, estimate=estimate
-    // self->fd, estimate=estimate, limit=_PY_READ_MAX
-    PyObject *args[] = {estimate_obj, }
-    PyObject *found_eof = PyObject_VectorcallMethod(bytesio_class, "readfrom");
+    args[1] = PyLong_FromLong(self->fd);
+    if(!args[1]) {
+        goto leave;
+    }
+    fn_name = PyUnicode_InternFromString("readfrom");
+    if (!fn_name) {
+        goto leave;
+    }
+    keyword = Py_BuildValue("(s)", "estimate");
+    if (!keyword) {
+        goto leave;
+    }
+    found_eof = PyObject_VectorcallMethod(
+        fn_name,
+        args,
+        2 | PY_VECTORCALL_ARGUMENTS_OFFSET,
+        keyword
+    );
     if (!found_eof) {
-        Py_DCREF(bytesio_class);
-        return NULL;
+        goto leave;
     }
-    PyObject *result = PyObject_CallMethodNoArgs(bytesio_class, &_Py_ID(getvalue));
-    if (!getvalue) {
-        return NULL;
+
+    /* result = bio.getvalue()
+       return result if result or found_eof else None */
+    Py_DECREF(keyword);
+    keyword = PyUnicode_InternFromString("getvalue");
+    if (!keyword) {
+        goto leave;
+    }
+    result = PyObject_CallMethodNoArgs(args[0], keyword);
+    if (!result) {
+        goto leave;
     }
 
-    Py_DECREF(bytesio_class);
-    if (!bio) {
-        return NULL;
+    if (!PyBool_Check(found_eof) && !PyBool_Check(result)) {
+        Py_DECREF(result);
+        result = Py_None;
     }
-    _PyObject_Call(bio, )
+
+leave:
+    Py_XDECREF(args[0]);
+    Py_XDECREF(args[1]);
+    Py_XDECREF(args[2]);
+    Py_XDECREF(fn_name);
+    Py_XDECREF(keyword);
+    Py_XDECREF(found_eof);
     return result;
 }
 

From 4b3664f91e1a6e2ea4b0ba9857f6a98bb88a43cf Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Wed, 12 Feb 2025 16:58:56 -0800
Subject: [PATCH 6/8] Non-blocking readfrom working

---
 Lib/_pyio.py         | 2 +-
 Modules/_io/fileio.c | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index fa896890dffe47..393720d69d87a9 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -991,7 +991,7 @@ def readfrom(self, file, /, *, estimate=None, limit=None):
         except BlockingIOError:
             pass
 
-        # Buffer must be
+        # Remove all excess bytes.
         self._buffer.resize(self._pos)
         return found_eof
 
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index 94676d5e339192..80c9649598a800 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -716,7 +716,6 @@ static PyObject *
 _io_FileIO_readall_impl(fileio *self)
 /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
 {
-    Py_ssize_t pos = 0;
     PyObject* estimate_obj = Py_None;
     PyObject *args[3] = {NULL, NULL, NULL};
     PyObject *fn_name = NULL;
@@ -729,6 +728,7 @@ _io_FileIO_readall_impl(fileio *self)
     }
 
     if (self->stat_atopen != NULL && self->stat_atopen->st_size >= 0) {
+        Py_ssize_t pos = 0;
         Py_ssize_t estimate = self->stat_atopen->st_size;
         /* While a lot of code does open().read() to get the whole contents
            of a file it is possible a caller seeks/reads a ways into the file
@@ -808,7 +808,8 @@ _io_FileIO_readall_impl(fileio *self)
         goto leave;
     }
 
-    if (!PyBool_Check(found_eof) && !PyBool_Check(result)) {
+    /* Read was blocked (didn't get to end, and didn't find data) */
+    if (!PyObject_IsTrue(result) && !PyObject_IsTrue(found_eof)) {
         Py_DECREF(result);
         result = Py_None;
     }

From b3ca823715b3bd8e3b50ea9d93d94b4ccc5ebe28 Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Wed, 12 Feb 2025 17:55:43 -0800
Subject: [PATCH 7/8] WIP: readfrom for general file objects with readinto,
 read

---
 Lib/_compression.py | 12 ++++--------
 Lib/_pyio.py        | 12 +++++++++++-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/Lib/_compression.py b/Lib/_compression.py
index e8b70aa0a3e680..d3530d32cf6d3b 100644
--- a/Lib/_compression.py
+++ b/Lib/_compression.py
@@ -111,14 +111,10 @@ def read(self, size=-1):
         return data
 
     def readall(self):
-        chunks = []
-        # sys.maxsize means the max length of output buffer is unlimited,
-        # so that the whole input buffer can be decompressed within one
-        # .decompress() call.
-        while data := self.read(sys.maxsize):
-            chunks.append(data)
-
-        return b"".join(chunks)
+        # FIXME(cmaloney): non blocking support?
+        bio = io.BytesIO()
+        bio.readfrom(self)
+        return bio.getvalue()
 
     # Rewind the file to the beginning of the data stream.
     def _rewind(self):
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 393720d69d87a9..c0a1553a0a7640 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -962,10 +962,20 @@ def readfrom(self, file, /, *, estimate=None, limit=None):
         if len(self._buffer) < target_read + self._pos:
             self._buffer.resize(self._pos + target_read)
 
+        # File descriptor
+        if isinstance(file, int):
+            read_fn = lambda: os.readinto(file, memoryview(self._buffer)[self._pos:])
+        elif file_readinto := getattr(file, "readinto", None):
+            read_fn = lambda: file_readinto(memoryview(self._buffer)[self._pos:])
+        elif file_read := getattr(file, "read", None):
+            def read_fn():
+                data = file_read(len(self._buffer) - self._pos)
+                self._buffer[self._pos:self._pos + len(data)] = data
+
         found_eof = False
         start_pos = self._pos
         try:
-            while n := os.readinto(file, memoryview(self._buffer)[self._pos:]):
+            while n := read_fn():
                 self._pos += n
                 # Expand buffer if needed.
                 if len(self._buffer) - self._pos <= 0:

From deb12cc7af2948c83885e7d8c3c34dd1001d6a23 Mon Sep 17 00:00:00 2001
From: Cody Maloney <cmaloney@theoreticalchaos.com>
Date: Thu, 13 Feb 2025 13:01:55 -0800
Subject: [PATCH 8/8] tweak comments

---
 Lib/_pyio.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index c0a1553a0a7640..19445848e12f07 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -939,7 +939,6 @@ def readfrom(self, file, /, *, estimate=None, limit=None):
         # byte which returns size 0. Oversize the buffer by 1 byte so the
         # I/O can be completed with two read() calls (one for all data, one
         # for EOF) without needing to resize the buffer.
-        # FIXME(cmaloney): This should probably be a memoryview....
         target_read = None
         if estimate is not None:
             target_read = int(estimate) + 1
@@ -954,16 +953,14 @@ def readfrom(self, file, /, *, estimate=None, limit=None):
             if limit < 0:
                 raise ValueError(f"limit must be larger than 0, got {limit}")
 
-        # Expand buffer to get target read in one read when possible.
         if limit is not None:
             target_read = min(target_read, limit)
 
-        # Expand so target read definitely fits.
+        # Expand buffer to get target read in one read when possible.
         if len(self._buffer) < target_read + self._pos:
             self._buffer.resize(self._pos + target_read)
 
-        # File descriptor
-        if isinstance(file, int):
+        if isinstance(file, int):  # File descriptor
             read_fn = lambda: os.readinto(file, memoryview(self._buffer)[self._pos:])
         elif file_readinto := getattr(file, "readinto", None):
             read_fn = lambda: file_readinto(memoryview(self._buffer)[self._pos:])