From e3502910899112eb9e96aeb6ca4930e6e3640e8a Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 8 Feb 2025 16:07:46 -0800 Subject: [PATCH 1/8] WIP: Pyio readfrom --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 + Lib/_pyio.py | 111 +++++++--- Lib/subprocess.py | 9 +- Modules/_io/bytesio.c | 203 ++++++++++++++++++ Modules/_io/clinic/bytesio.c.h | 103 ++++++++- 8 files changed, 402 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 90214a314031d1..b07db953e2c6bb 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -916,6 +916,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(entrypoint)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(env)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(estimate)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 97a75d0c46c867..a93cc79bda63c6 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -405,6 +405,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(entrypoint) STRUCT_FOR_ID(env) STRUCT_FOR_ID(errors) + STRUCT_FOR_ID(estimate) STRUCT_FOR_ID(event) STRUCT_FOR_ID(eventmask) STRUCT_FOR_ID(exc_type) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4f928cc050bf8e..81bd0aefc668ee 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -914,6 +914,7 @@ extern "C" { INIT_ID(entrypoint), \ INIT_ID(env), \ INIT_ID(errors), \ + INIT_ID(estimate), \ INIT_ID(event), \ INIT_ID(eventmask), \ INIT_ID(exc_type), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 5b78d038fc1192..cad8f2731fc222 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1416,6 +1416,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(estimate); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(event); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/_pyio.py b/Lib/_pyio.py index f7370dff19efc8..4ebed101d688e8 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -926,6 +926,83 @@ def read1(self, size=-1): """ return self.read(size) + def readfrom(self, file, /, *, estimate=None, limit=None): + """Efficiently read from the provided file and return True if hit end. + + Returns True if and only if a read into a non-zero length buffer + returns 0 bytes. On most systems this indicates end of file / stream. + """ + if self.closed: + raise ValueError("read from closed file") + + # In order to detect end of file, need a read() of at least 1 + # byte which returns size 0. Oversize the buffer by 1 byte so the + # I/O can be completed with two read() calls (one for all data, one + # for EOF) without needing to resize the buffer. + # FIXME(cmaloney): This should probably be a memoryview.... + if estimate is not None: + estimate = int(estimate) + 1 + + # Cap to limit + if limit is not None: + limit = int(limit) + if limit <= 0: + raise ValueError(f"limit must be larger than 0, got {limit}") + + if target_read is not None: + target_read = min(target_read, limit) + + if self._pos - len(self._buffer) < estimate: + self._buffer.resize(self._pos + target_read) + + # FIXME(cmaloney): Expand buffer if needed + start_pos = self._pos + try: + while True: + bytes_read = self._pos - start_pos + if limit is not None and limit <= bytes_read: + return False + + if target_read <= 0: + # FIXME(cmaloney): Check this matces + self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read)) + + if limit is not None and bytes_read >= limit: + return False + + # Make sure there is space for the read. + if target_read : + + + # Cap target read + # Hit cap, not EOF. + bytes_read = self._pos - start_pos + if bytes_read >= cap: + return False + + read_size = len(self._buffer) - self._pos + + # Calculate next read size. + if self._pos >= len(self._buffer): + self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read)) + + if read_size <= 0: + # Fill remaining buffer, but never read more than cap. + read_size = len(self._buffer) - self._pos + read_size = min(start_pos - self, cap - bytes_read) + + n = os.readinto(file, memoryview(self._buffer)[self._pos:]) + self._pos += n + bytes_read += n + read_size -= n + if read_size <= 0: + read_size = _new_buffersize(bytes_read) + assert len(result) - bytes_read >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + except BlockingIOError: + if not bytes_read: + return None + def write(self, b): if self.closed: raise ValueError("write to closed file") @@ -1666,38 +1743,24 @@ def readall(self): """ self._checkClosed() self._checkReadable() - if self._stat_atopen is None or self._stat_atopen.st_size <= 0: - bufsize = DEFAULT_BUFFER_SIZE - else: - # In order to detect end of file, need a read() of at least 1 - # byte which returns size 0. Oversize the buffer by 1 byte so the - # I/O can be completed with two read() calls (one for all data, one - # for EOF) without needing to resize the buffer. - bufsize = self._stat_atopen.st_size + 1 - - if self._stat_atopen.st_size > 65536: + estimate = None + if self._stat_atopen and self._stat_atopen.st_size >= 0: + estimate = self._stat_atopen.st_size + if estimate > 65536: try: pos = os.lseek(self._fd, 0, SEEK_CUR) - if self._stat_atopen.st_size >= pos: - bufsize = self._stat_atopen.st_size - pos + 1 + estimate = estimate - pos if estimate > pos else 0 except OSError: pass - result = bytearray(bufsize) - bytes_read = 0 + bio = BytesIO() try: - while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): - bytes_read += n - if bytes_read >= len(result): - result.resize(_new_buffersize(bytes_read)) + bio.readfrom(self._fd, estimate=estimate) + return bio.getvalue() except BlockingIOError: - if not bytes_read: - return None + result = bio.getvalue() + return result if result else None - assert len(result) - bytes_read >= 1, \ - "os.readinto buffer size 0 will result in erroneous EOF / returns 0" - result.resize(bytes_read) - return bytes(result) def readinto(self, buffer): """Same as RawIOBase.readinto().""" diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 2044d2a42897e9..e0f449b72011dc 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -1921,12 +1921,9 @@ def _execute_child(self, args, executable, preexec_fn, close_fds, # Wait for exec to fail or succeed; possibly raising an # exception (limited in size) - errpipe_data = bytearray() - while True: - part = os.read(errpipe_read, 50000) - errpipe_data += part - if not part or len(errpipe_data) > 50000: - break + bio = io.BytesIO() + bio.readfrom(errpipe_read, estimate=0, limit=50_000) + errpipe_data = bio.getvalue() finally: # be sure the FD is closed no matter what os.close(errpipe_read) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index dc4e40b9f09a1d..ae076d1cec7b05 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -5,6 +5,9 @@ #include // offsetof() #include "_iomodule.h" + +#define STACK_BUFER_SIZE 1024 + /*[clinic input] module _io class _io.BytesIO "bytesio *" "clinic_state()->PyBytesIO_Type" @@ -465,6 +468,205 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size) return _io_BytesIO_read_impl(self, size); } +static size_t +_bytesio_new_buffersize(size_t bytes_read) +{ + size_t addend; + + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. For bigger sizes, use a + less-than-double growth factor to avoid excessive allocation. */ + assert(bytes_read <= PY_SSIZE_T_MAX); + if (bytes_read > 65536) + addend = bytes_read >> 3; + else + addend = 256 + bytes_read; + if (addend < 8 * 1024) + /* Avoid tiny read() calls. */ + addend = 8 * 1024; + return bytes_read + addend; +} + +/* Read from a fd where there is no data expected to be read. +This is faster (less allocations, less copies) when there is no data, at the +expense of slightly slower if there is actual data to read. Falls back to normal +read loop if more than one buffer of data. + +-1 == error, 0 == hit cap or blocked, exit, 1 == hit eof / True return, 2 == read more +*/ +static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_size) { + assert(*cap_size > 0 ** "Must attempt to read at least one byte."); + char local_buffer[STACK_BUFER_SIZE]; + Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *cap_size); + Py_ssize_t result = _Py_read(fd, local_buffer, read_size); + + /* Hit EOF in a single read, return True. */ + if (result == 0) { + return 1; + } + if (result == -1) { + /* BlockingIOError -> return False (didn't find EOF). */ + if (errno == EAGAIN) { + PyErr_Clear(); + return 0; + } + return -1; + } + + /* Got data, copy across to the buf, then proceed with normal read loop. + + FIXME? The temporary bytes object is an unnecessary copy + allocation. + yea: faster / less copies, remove some redundant checks + nay: resizing, appending, copying, updating pointers is a lot. */ + PyObject *bytes = PyBytes_FromStringAndSize(local_buffer, result); + if (!bytes) { + return -1; + } + result = write_bytes(self, bytes); + Py_DECREF(bytes); + if (result < 0) { + return -1; + } + /* Hit cap, nothing left to do. */ + if (result == *cap_size) { + return 0; + } + *cap_size -= result; + return 2; +} + + +/*[clinic input] +_io.BytesIO.readfrom -> bool + file: int + / + * + estimate: Py_ssize_t(accept={int, NoneType}) = -1 + limit: Py_ssize_t(accept={int, NoneType}) = -1 + +Efficiently read from the provided file and return True if hit end of file. + +Returns True if and only if a read into a non-zero length buffer returns 0 +bytes. On most systems this indicates end of file / stream. + +FIXME?: Allow fileobj that provides readinto.? +FIXME?:Allow fileobj that only has read? + +If a readinto call raises NonBlockingError or returns None, data returned to +that point will be stored in buffer, and will return False. For other exceptions +while reading, as much data as possible will be in the buffer. + +FIXME: BlockingIOError contains data from partial reads. Append it. + -> Include test that no data is lost w/ multiple repeated blocks + (There is one already in tests, make sure this is exercised and passes + it) +FIXME: Does this need to document that all reads are Limited to PY_SSIZE_T_MAX. +FIXME? It would be nice if this could support a timeout, but probably a feature + for later. +[clinic start generated code]*/ + +static int +_io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, + Py_ssize_t limit) +/*[clinic end generated code: output=71dcfcf7e9a50527 input=9bce10ea48db6415]*/ +{ + if (check_closed(self)) { + return -1; + } + if (check_exports(self)) { + return -1; + } + /* Cap all reads to PY_SSIZE_T_MAX */ + Py_ssize_t cap_size = Py_MIN(Py_MAX(limit, 0), PY_SSIZE_T_MAX); + assert(cap_size > 0); + + /* Try and get estimated_size in a single read. */ + Py_ssize_t read_size = DEFAULT_BUFFER_SIZE; + if (estimate > 0) { + /* In order to detect end of file, need a read() of at + least 1 byte which returns size 0. Oversize the buffer + by 1 byte so the I/O can be completed with two read() + calls (one for all data, one for EOF) without needing + to resize the buffer. */ + read_size = estimate + ((estimate <= PY_SSIZE_T_MAX - 1) ? 1 : 0); + } else if (estimate == 0 || cap_size < STACK_BUFER_SIZE) { + /* A number of things in the normal path expect no data, use a small + temp buffer for those, only expanding buffer if absolutely needed. */ + Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &cap_size); + if (result != 2) { + return result; + } + } + + /* Never read more than limit. */ + read_size = Py_MIN(read_size, cap_size); + assert(read_size > 0); + + Py_ssize_t current_size = PyBytes_GET_SIZE(self->buf); + if (PY_SSIZE_T_MAX - read_size - current_size > 0) + current_size += read_size; + else { + current_size = PY_SSIZE_T_MAX; + } + if (_PyBytes_Resize(&self->buf, current_size)) { + return -1; + } + Py_ssize_t bytes_read = 0; + Py_ssize_t found_eof = 0; + while (true) { + /* Expand buffer if needed. */ + if (self->string_size >= current_size) { + Py_ssize_t target_size = _bytesio_new_buffersize(current_size); + if (target_size > PY_SSIZE_T_MAX || target_size <= 0) { + PyErr_SetString(PyExc_OverflowError, + "unbounded read returned more bytes " + "than a Python bytes object can hold"); + return -1; + } + if (_PyBytes_Resize(&self->buf, target_size)) { + return -1; + } + current_size = target_size; + read_size = target_size - current_size; + } + // DEBUG: printf("cs: %zd, ss: %zd, cap: %zd, read: %zd\n", current_size, self->string_size, cap_size, bytes_read); + read_size = Py_MIN(current_size - self->string_size, cap_size - bytes_read); + assert(read_size > 0); // Should always be reading some bytes. + assert(self->string_size + read_size <= current_size); + Py_ssize_t result = _Py_read(file, + PyBytes_AS_STRING(self->buf) + self->string_size, + read_size); + if (result == -1) { + // Blocking -> early exit without error. + if (errno == EAGAIN) { + PyErr_Clear(); + break; + } + return -1; + } + // Found EOF. + if (result == 0) { + found_eof = 1; + break; + } + assert(result >= 0); // Should have got bytes + self->string_size += result; + bytes_read += result; + assert(bytes_read <= cap_size); // Shold + if (bytes_read >= cap_size) { + found_eof = 0; + break; + } + } + // FIXME? There could be quite a bit of space between current_size and + // self->string_size, should this downsize then? + // + // yea: Save excess memory + // nay: Efficient pre-allocated buffer reuse if long lived, getting out the + // bytes() will do anyways + return found_eof; +} + /*[clinic input] _io.BytesIO.readline size: Py_ssize_t(accept={int, NoneType}) = -1 @@ -1027,6 +1229,7 @@ static struct PyMethodDef bytesio_methods[] = { _IO_BYTESIO_WRITE_METHODDEF _IO_BYTESIO_WRITELINES_METHODDEF _IO_BYTESIO_READ1_METHODDEF + _IO_BYTESIO_READFROM_METHODDEF _IO_BYTESIO_READINTO_METHODDEF _IO_BYTESIO_READLINE_METHODDEF _IO_BYTESIO_READLINES_METHODDEF diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h index 5528df952c33fb..797fdf1027463b 100644 --- a/Modules/_io/clinic/bytesio.c.h +++ b/Modules/_io/clinic/bytesio.c.h @@ -233,6 +233,107 @@ _io_BytesIO_read1(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_io_BytesIO_readfrom__doc__, +"readfrom($self, file, /, *, estimate=-1, limit=-1)\n" +"--\n" +"\n" +"Efficiently read from the provided file and return True if hit end of file.\n" +"\n" +"Returns True if and only if a read into a non-zero length buffer returns 0\n" +"bytes. On most systems this indicates end of file / stream.\n" +"\n" +"FIXME?: Allow fileobj that provides readinto.?\n" +"FIXME?:Allow fileobj that only has read?\n" +"\n" +"If a readinto call raises NonBlockingError or returns None, data returned to\n" +"that point will be stored in buffer, and will return False. For other exceptions\n" +"while reading, as much data as possible will be in the buffer.\n" +"\n" +"FIXME: BlockingIOError contains data from partial reads. Append it.\n" +" -> Include test that no data is lost w/ multiple repeated blocks\n" +" (There is one already in tests, make sure this is exercised and passes\n" +" it)\n" +"FIXME: Does this need to document that all reads are Limited to PY_SSIZE_T_MAX.\n" +"FIXME? It would be nice if this could support a timeout, but probably a feature\n" +" for later."); + +#define _IO_BYTESIO_READFROM_METHODDEF \ + {"readfrom", _PyCFunction_CAST(_io_BytesIO_readfrom), METH_FASTCALL|METH_KEYWORDS, _io_BytesIO_readfrom__doc__}, + +static int +_io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, + Py_ssize_t limit); + +static PyObject * +_io_BytesIO_readfrom(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(estimate), &_Py_ID(limit), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "estimate", "limit", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "readfrom", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + int file; + Py_ssize_t estimate = -1; + Py_ssize_t limit = -1; + int _return_value; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + file = PyLong_AsInt(args[0]); + if (file == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + if (!_Py_convert_optional_to_ssize_t(args[1], &estimate)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (!_Py_convert_optional_to_ssize_t(args[2], &limit)) { + goto exit; + } +skip_optional_kwonly: + _return_value = _io_BytesIO_readfrom_impl((bytesio *)self, file, estimate, limit); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(_io_BytesIO_readline__doc__, "readline($self, size=-1, /)\n" "--\n" @@ -535,4 +636,4 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=8a5e153bc7584b55 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bab5a4081d518e22 input=a9049054013a1b77]*/ From fd457a80a37eb1a4d938d7194b2e2dac045d0095 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sun, 9 Feb 2025 17:40:58 -0800 Subject: [PATCH 2/8] Working --- Lib/_pyio.py | 96 +++++++++++++++++++------------------------ Modules/_io/bytesio.c | 59 +++++++++++++++----------- 2 files changed, 79 insertions(+), 76 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 4ebed101d688e8..a3016b9d07d760 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -940,68 +940,60 @@ def readfrom(self, file, /, *, estimate=None, limit=None): # I/O can be completed with two read() calls (one for all data, one # for EOF) without needing to resize the buffer. # FIXME(cmaloney): This should probably be a memoryview.... + target_read = None if estimate is not None: - estimate = int(estimate) + 1 + target_read = int(estimate) + 1 + else: + target_read = DEFAULT_BUFFER_SIZE # Cap to limit if limit is not None: limit = int(limit) - if limit <= 0: + if limit == 0: + return False + if limit < 0: raise ValueError(f"limit must be larger than 0, got {limit}") - if target_read is not None: - target_read = min(target_read, limit) + # Expand buffer to get target read in one read when possible. + if limit is not None: + target_read = min(target_read, limit) - if self._pos - len(self._buffer) < estimate: + # Expand so target read definitely fits. + if len(self._buffer) < target_read + self._pos: self._buffer.resize(self._pos + target_read) - # FIXME(cmaloney): Expand buffer if needed + found_eof = False start_pos = self._pos try: - while True: - bytes_read = self._pos - start_pos - if limit is not None and limit <= bytes_read: - return False - - if target_read <= 0: - # FIXME(cmaloney): Check this matces - self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read)) - - if limit is not None and bytes_read >= limit: - return False - - # Make sure there is space for the read. - if target_read : - - - # Cap target read - # Hit cap, not EOF. - bytes_read = self._pos - start_pos - if bytes_read >= cap: - return False - - read_size = len(self._buffer) - self._pos - - # Calculate next read size. - if self._pos >= len(self._buffer): - self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read)) + while n := os.readinto(file, memoryview(self._buffer)[self._pos:]): + self._pos += n + # Expand buffer if needed. + if len(self._buffer) - self._pos <= 0: + bytes_read = self._pos - start_pos + target_read = _new_buffersize(bytes_read) + + # Keep buffer size <= limit, so only need to check against + # limit when resizing. + if limit is not None: + remaining = limit - bytes_read + if remaining <= 0: + assert remaining == 0, "should never pass limit" + break + target_read = min(remaining, target_read) + + self._buffer.resize(target_read + len(self._buffer)) - if read_size <= 0: - # Fill remaining buffer, but never read more than cap. - read_size = len(self._buffer) - self._pos - read_size = min(start_pos - self, cap - bytes_read) + else: + assert len(self._buffer) - self._pos >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + found_eof = True - n = os.readinto(file, memoryview(self._buffer)[self._pos:]) - self._pos += n - bytes_read += n - read_size -= n - if read_size <= 0: - read_size = _new_buffersize(bytes_read) - assert len(result) - bytes_read >= 1, \ - "os.readinto buffer size 0 will result in erroneous EOF / returns 0" except BlockingIOError: - if not bytes_read: - return None + pass + + # Buffer must be + self._buffer.resize(self._pos) + return found_eof def write(self, b): if self.closed: @@ -1754,12 +1746,10 @@ def readall(self): pass bio = BytesIO() - try: - bio.readfrom(self._fd, estimate=estimate) - return bio.getvalue() - except BlockingIOError: - result = bio.getvalue() - return result if result else None + found_eof = bio.readfrom(self._fd, estimate=estimate) + result = bio.getvalue() + # No limit in readfrom, so not finding eof indicates blocked. + return result if result or found_eof else None def readinto(self, buffer): diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index ae076d1cec7b05..234594c69cc0fa 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -494,10 +494,10 @@ read loop if more than one buffer of data. -1 == error, 0 == hit cap or blocked, exit, 1 == hit eof / True return, 2 == read more */ -static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_size) { - assert(*cap_size > 0 ** "Must attempt to read at least one byte."); +static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *limit) { + assert(*limit > 0 ** "Must attempt to read at least one byte."); char local_buffer[STACK_BUFER_SIZE]; - Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *cap_size); + Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *limit); Py_ssize_t result = _Py_read(fd, local_buffer, read_size); /* Hit EOF in a single read, return True. */ @@ -528,10 +528,10 @@ static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_s return -1; } /* Hit cap, nothing left to do. */ - if (result == *cap_size) { + if (result == *limit) { return 0; } - *cap_size -= result; + *limit -= result; return 2; } @@ -576,9 +576,15 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, if (check_exports(self)) { return -1; } - /* Cap all reads to PY_SSIZE_T_MAX */ - Py_ssize_t cap_size = Py_MIN(Py_MAX(limit, 0), PY_SSIZE_T_MAX); - assert(cap_size > 0); + /* Limit all reads to PY_SSIZE_T_MAX */ + if (limit < 0) { + limit = PY_SSIZE_T_MAX; + } else if (limit == 0) { + // Limit == 0. no read. + // FIXME(cmaloney): Should this guarantee at least one read? (os.readinto technically accepts 0 length...) + return 0; + } + assert(limit > 0); /* Try and get estimated_size in a single read. */ Py_ssize_t read_size = DEFAULT_BUFFER_SIZE; @@ -589,17 +595,17 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, calls (one for all data, one for EOF) without needing to resize the buffer. */ read_size = estimate + ((estimate <= PY_SSIZE_T_MAX - 1) ? 1 : 0); - } else if (estimate == 0 || cap_size < STACK_BUFER_SIZE) { + } else if (estimate == 0 || limit < STACK_BUFER_SIZE) { /* A number of things in the normal path expect no data, use a small temp buffer for those, only expanding buffer if absolutely needed. */ - Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &cap_size); + Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &limit); if (result != 2) { return result; } } /* Never read more than limit. */ - read_size = Py_MIN(read_size, cap_size); + read_size = Py_MIN(read_size, limit); assert(read_size > 0); Py_ssize_t current_size = PyBytes_GET_SIZE(self->buf); @@ -616,21 +622,28 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, while (true) { /* Expand buffer if needed. */ if (self->string_size >= current_size) { - Py_ssize_t target_size = _bytesio_new_buffersize(current_size); - if (target_size > PY_SSIZE_T_MAX || target_size <= 0) { + if (current_size >= PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, - "unbounded read returned more bytes " - "than a Python bytes object can hold"); - return -1; + "unbounded read returned more bytes " + "than a Python bytes object can hold"); + } - if (_PyBytes_Resize(&self->buf, target_size)) { + Py_ssize_t target_read = _bytesio_new_buffersize(bytes_read); + /* Never read more than limit bytes_read. */ + target_read = Py_MIN(target_read, limit - bytes_read); + + /* Buffer can't get larger than PY_SSIZE_T_MAX */ + if (PY_SSIZE_T_MAX - current_size < target_read) { + target_read = PY_SSIZE_T_MAX - current_size; + } + + current_size += target_read; + if (_PyBytes_Resize(&self->buf, current_size)) { return -1; } - current_size = target_size; - read_size = target_size - current_size; } - // DEBUG: printf("cs: %zd, ss: %zd, cap: %zd, read: %zd\n", current_size, self->string_size, cap_size, bytes_read); - read_size = Py_MIN(current_size - self->string_size, cap_size - bytes_read); + // DEBUG: printf("cs: %zd, ss: %zd, limit: %zd, read: %zd\n", current_size, self->string_size, limit, bytes_read); + read_size = Py_MIN(current_size - self->string_size, limit - bytes_read); assert(read_size > 0); // Should always be reading some bytes. assert(self->string_size + read_size <= current_size); Py_ssize_t result = _Py_read(file, @@ -652,8 +665,8 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, assert(result >= 0); // Should have got bytes self->string_size += result; bytes_read += result; - assert(bytes_read <= cap_size); // Shold - if (bytes_read >= cap_size) { + assert(bytes_read <= limit); // Shold + if (bytes_read >= limit) { found_eof = 0; break; } From 64abc8484be76cd899339e01cc7624415ea44c0e Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sun, 9 Feb 2025 18:41:06 -0800 Subject: [PATCH 3/8] WIP: Moving towards FileIO.readall using BytesIO.readfrom --- Lib/_pyio.py | 2 +- Modules/_io/bytesio.c | 1 + Modules/_io/fileio.c | 106 ++++++++++++++---------------------------- 3 files changed, 36 insertions(+), 73 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index a3016b9d07d760..fa896890dffe47 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -949,7 +949,7 @@ def readfrom(self, file, /, *, estimate=None, limit=None): # Cap to limit if limit is not None: limit = int(limit) - if limit == 0: + if limit == 0: # Nothing to read. return False if limit < 0: raise ValueError(f"limit must be larger than 0, got {limit}") diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 234594c69cc0fa..571204f4f2bbef 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -570,6 +570,7 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, Py_ssize_t limit) /*[clinic end generated code: output=71dcfcf7e9a50527 input=9bce10ea48db6415]*/ { + /* FIXME: Cap to _PY_READ_MAX */ if (check_closed(self)) { return -1; } diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 89f1cfe6b20935..a4e7ab7c7d7ca3 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -735,40 +735,15 @@ static PyObject * _io_FileIO_readall_impl(fileio *self) /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/ { - Py_off_t pos, end; - PyObject *result; - Py_ssize_t bytes_read = 0; - Py_ssize_t n; - size_t bufsize; + PyObject* estimate_obj = Py_None; + PyObject* result = NULL; if (self->fd < 0) { return err_closed(); } - if (self->stat_atopen != NULL && self->stat_atopen->st_size < _PY_READ_MAX) { - end = (Py_off_t)self->stat_atopen->st_size; - } - else { - end = -1; - } - if (end <= 0) { - /* Use a default size and resize as needed. */ - bufsize = SMALLCHUNK; - } - else { - /* This is probably a real file. */ - if (end > _PY_READ_MAX - 1) { - bufsize = _PY_READ_MAX; - } - else { - /* In order to detect end of file, need a read() of at - least 1 byte which returns size 0. Oversize the buffer - by 1 byte so the I/O can be completed with two read() - calls (one for all data, one for EOF) without needing - to resize the buffer. */ - bufsize = (size_t)end + 1; - } - + if (self->stat_atopen != NULL && self->stat_atopen->st_size >= 0) { + Py_ssize_t estimate = self->stat_atopen->st_size; /* While a lot of code does open().read() to get the whole contents of a file it is possible a caller seeks/reads a ways into the file then calls readall() to get the rest, which would result in allocating @@ -785,58 +760,45 @@ _io_FileIO_readall_impl(fileio *self) _Py_END_SUPPRESS_IPH Py_END_ALLOW_THREADS - if (end >= pos && pos >= 0 && (end - pos) < (_PY_READ_MAX - 1)) { - bufsize = (size_t)(end - pos) + 1; + if (estimate >= pos) { + estimate -= pos; } } + estimate_obj = PyLong_FromSsize_t(estimate); + if(!estimate_obj) { + return NULL; + } } - - result = PyBytes_FromStringAndSize(NULL, bufsize); - if (result == NULL) + /* Use BytesIO.readfrom(fd, estimate=estimate) */ + PyObject *bytesio_class = PyImport_ImportModuleAttrString("io", "BytesIO"); + if (!bytesio_class) { return NULL; + } + PyObject *bio = _Py_CallNoArgs(bytesio_class); + Py_DECREF(bytesioclass); + if (!bio) { + return NULL; + } - while (1) { - if (bytes_read >= (Py_ssize_t)bufsize) { - bufsize = new_buffersize(self, bytes_read); - if (bufsize > PY_SSIZE_T_MAX || bufsize <= 0) { - PyErr_SetString(PyExc_OverflowError, - "unbounded read returned more bytes " - "than a Python bytes object can hold"); - Py_DECREF(result); - return NULL; - } - - if (PyBytes_GET_SIZE(result) < (Py_ssize_t)bufsize) { - if (_PyBytes_Resize(&result, bufsize) < 0) - return NULL; - } - } - - n = _Py_read(self->fd, - PyBytes_AS_STRING(result) + bytes_read, - bufsize - bytes_read); - - if (n == 0) - break; - if (n == -1) { - if (errno == EAGAIN) { - PyErr_Clear(); - if (bytes_read > 0) - break; - Py_DECREF(result); - Py_RETURN_NONE; - } - Py_DECREF(result); - return NULL; - } - bytes_read += n; + // FIXME: self._fd, estimate=estimate + // self->fd, estimate=estimate, limit=_PY_READ_MAX + PyObject *args[] = {estimate_obj, } + PyObject *found_eof = PyObject_VectorcallMethod(bytesio_class, "readfrom"); + if (!found_eof) { + Py_DCREF(bytesio_class); + return NULL; + } + PyObject *result = PyObject_CallMethodNoArgs(bytesio_class, &_Py_ID(getvalue)); + if (!getvalue) { + return NULL; } - if (PyBytes_GET_SIZE(result) > bytes_read) { - if (_PyBytes_Resize(&result, bytes_read) < 0) - return NULL; + Py_DECREF(bytesio_class); + if (!bio) { + return NULL; } + _PyObject_Call(bio, ) return result; } From 5f6bee9e6bc756012d079a75ac3ee0af2a5577d3 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 12 Feb 2025 11:32:02 -0800 Subject: [PATCH 4/8] WIP: FileIO.readall -> BytesIO._readfrom --- Modules/_io/fileio.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index a4e7ab7c7d7ca3..da4e1330997da3 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -735,6 +735,7 @@ static PyObject * _io_FileIO_readall_impl(fileio *self) /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/ { + Py_ssize_t pos = 0; PyObject* estimate_obj = Py_None; PyObject* result = NULL; @@ -749,7 +750,7 @@ _io_FileIO_readall_impl(fileio *self) then calls readall() to get the rest, which would result in allocating more than required. Guard against that for larger files where we expect the I/O time to dominate anyways while keeping small files fast. */ - if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) { + if (estimate > LARGE_BUFFER_CUTOFF_SIZE) { Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH #ifdef MS_WINDOWS @@ -770,6 +771,13 @@ _io_FileIO_readall_impl(fileio *self) } } + /* + bio = io.BytesIO(); + found_eof = bio.readfrom(self->fd, estimate=estimate) + result = bio.getvalue() + return result if result or found_eof else None + */ + /* Use BytesIO.readfrom(fd, estimate=estimate) */ PyObject *bytesio_class = PyImport_ImportModuleAttrString("io", "BytesIO"); if (!bytesio_class) { From 258025dff48ba9ef3f401ba8fb0cede9dca31ced Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 12 Feb 2025 16:33:20 -0800 Subject: [PATCH 5/8] io.FileIO.readall working via io.BytesIO.readall --- Modules/_io/fileio.c | 103 ++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 45 deletions(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index da4e1330997da3..94676d5e339192 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -703,25 +703,6 @@ _io_FileIO_readinto_impl(fileio *self, PyTypeObject *cls, Py_buffer *buffer) return PyLong_FromSsize_t(n); } -static size_t -new_buffersize(fileio *self, size_t currentsize) -{ - size_t addend; - - /* Expand the buffer by an amount proportional to the current size, - giving us amortized linear-time behavior. For bigger sizes, use a - less-than-double growth factor to avoid excessive allocation. */ - assert(currentsize <= PY_SSIZE_T_MAX); - if (currentsize > LARGE_BUFFER_CUTOFF_SIZE) - addend = currentsize >> 3; - else - addend = 256 + currentsize; - if (addend < SMALLCHUNK) - /* Avoid tiny read() calls. */ - addend = SMALLCHUNK; - return addend + currentsize; -} - /*[clinic input] _io.FileIO.readall @@ -737,7 +718,11 @@ _io_FileIO_readall_impl(fileio *self) { Py_ssize_t pos = 0; PyObject* estimate_obj = Py_None; - PyObject* result = NULL; + PyObject *args[3] = {NULL, NULL, NULL}; + PyObject *fn_name = NULL; + PyObject *keyword = NULL; + PyObject *result = NULL; + PyObject *found_eof = NULL; if (self->fd < 0) { return err_closed(); @@ -771,42 +756,70 @@ _io_FileIO_readall_impl(fileio *self) } } - /* - bio = io.BytesIO(); - found_eof = bio.readfrom(self->fd, estimate=estimate) - result = bio.getvalue() - return result if result or found_eof else None - */ - - /* Use BytesIO.readfrom(fd, estimate=estimate) */ - PyObject *bytesio_class = PyImport_ImportModuleAttrString("io", "BytesIO"); + /* bio = io.BytesIO(); + found_eof = bio.readfrom(self->fd, estimate=estimate) */ + PyObject *bytesio_class = PyImport_ImportModuleAttrString("_io", "BytesIO"); if (!bytesio_class) { + Py_DECREF(estimate_obj); return NULL; } - PyObject *bio = _Py_CallNoArgs(bytesio_class); - Py_DECREF(bytesioclass); - if (!bio) { + args[2] = estimate_obj; + estimate_obj = NULL; + + args[0] = PyObject_CallNoArgs(bytesio_class); + Py_DECREF(bytesio_class); + bytesio_class = NULL; + if (!args[0]) { + Py_DECREF(estimate_obj); return NULL; } - // FIXME: self._fd, estimate=estimate - // self->fd, estimate=estimate, limit=_PY_READ_MAX - PyObject *args[] = {estimate_obj, } - PyObject *found_eof = PyObject_VectorcallMethod(bytesio_class, "readfrom"); + args[1] = PyLong_FromLong(self->fd); + if(!args[1]) { + goto leave; + } + fn_name = PyUnicode_InternFromString("readfrom"); + if (!fn_name) { + goto leave; + } + keyword = Py_BuildValue("(s)", "estimate"); + if (!keyword) { + goto leave; + } + found_eof = PyObject_VectorcallMethod( + fn_name, + args, + 2 | PY_VECTORCALL_ARGUMENTS_OFFSET, + keyword + ); if (!found_eof) { - Py_DCREF(bytesio_class); - return NULL; + goto leave; } - PyObject *result = PyObject_CallMethodNoArgs(bytesio_class, &_Py_ID(getvalue)); - if (!getvalue) { - return NULL; + + /* result = bio.getvalue() + return result if result or found_eof else None */ + Py_DECREF(keyword); + keyword = PyUnicode_InternFromString("getvalue"); + if (!keyword) { + goto leave; + } + result = PyObject_CallMethodNoArgs(args[0], keyword); + if (!result) { + goto leave; } - Py_DECREF(bytesio_class); - if (!bio) { - return NULL; + if (!PyBool_Check(found_eof) && !PyBool_Check(result)) { + Py_DECREF(result); + result = Py_None; } - _PyObject_Call(bio, ) + +leave: + Py_XDECREF(args[0]); + Py_XDECREF(args[1]); + Py_XDECREF(args[2]); + Py_XDECREF(fn_name); + Py_XDECREF(keyword); + Py_XDECREF(found_eof); return result; } From 4b3664f91e1a6e2ea4b0ba9857f6a98bb88a43cf Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 12 Feb 2025 16:58:56 -0800 Subject: [PATCH 6/8] Non-blocking readfrom working --- Lib/_pyio.py | 2 +- Modules/_io/fileio.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index fa896890dffe47..393720d69d87a9 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -991,7 +991,7 @@ def readfrom(self, file, /, *, estimate=None, limit=None): except BlockingIOError: pass - # Buffer must be + # Remove all excess bytes. self._buffer.resize(self._pos) return found_eof diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 94676d5e339192..80c9649598a800 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -716,7 +716,6 @@ static PyObject * _io_FileIO_readall_impl(fileio *self) /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/ { - Py_ssize_t pos = 0; PyObject* estimate_obj = Py_None; PyObject *args[3] = {NULL, NULL, NULL}; PyObject *fn_name = NULL; @@ -729,6 +728,7 @@ _io_FileIO_readall_impl(fileio *self) } if (self->stat_atopen != NULL && self->stat_atopen->st_size >= 0) { + Py_ssize_t pos = 0; Py_ssize_t estimate = self->stat_atopen->st_size; /* While a lot of code does open().read() to get the whole contents of a file it is possible a caller seeks/reads a ways into the file @@ -808,7 +808,8 @@ _io_FileIO_readall_impl(fileio *self) goto leave; } - if (!PyBool_Check(found_eof) && !PyBool_Check(result)) { + /* Read was blocked (didn't get to end, and didn't find data) */ + if (!PyObject_IsTrue(result) && !PyObject_IsTrue(found_eof)) { Py_DECREF(result); result = Py_None; } From b3ca823715b3bd8e3b50ea9d93d94b4ccc5ebe28 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Wed, 12 Feb 2025 17:55:43 -0800 Subject: [PATCH 7/8] WIP: readfrom for general file objects with readinto, read --- Lib/_compression.py | 12 ++++-------- Lib/_pyio.py | 12 +++++++++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/Lib/_compression.py b/Lib/_compression.py index e8b70aa0a3e680..d3530d32cf6d3b 100644 --- a/Lib/_compression.py +++ b/Lib/_compression.py @@ -111,14 +111,10 @@ def read(self, size=-1): return data def readall(self): - chunks = [] - # sys.maxsize means the max length of output buffer is unlimited, - # so that the whole input buffer can be decompressed within one - # .decompress() call. - while data := self.read(sys.maxsize): - chunks.append(data) - - return b"".join(chunks) + # FIXME(cmaloney): non blocking support? + bio = io.BytesIO() + bio.readfrom(self) + return bio.getvalue() # Rewind the file to the beginning of the data stream. def _rewind(self): diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 393720d69d87a9..c0a1553a0a7640 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -962,10 +962,20 @@ def readfrom(self, file, /, *, estimate=None, limit=None): if len(self._buffer) < target_read + self._pos: self._buffer.resize(self._pos + target_read) + # File descriptor + if isinstance(file, int): + read_fn = lambda: os.readinto(file, memoryview(self._buffer)[self._pos:]) + elif file_readinto := getattr(file, "readinto", None): + read_fn = lambda: file_readinto(memoryview(self._buffer)[self._pos:]) + elif file_read := getattr(file, "read", None): + def read_fn(): + data = file_read(len(self._buffer) - self._pos) + self._buffer[self._pos:self._pos + len(data)] = data + found_eof = False start_pos = self._pos try: - while n := os.readinto(file, memoryview(self._buffer)[self._pos:]): + while n := read_fn(): self._pos += n # Expand buffer if needed. if len(self._buffer) - self._pos <= 0: From deb12cc7af2948c83885e7d8c3c34dd1001d6a23 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 13 Feb 2025 13:01:55 -0800 Subject: [PATCH 8/8] tweak comments --- Lib/_pyio.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index c0a1553a0a7640..19445848e12f07 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -939,7 +939,6 @@ def readfrom(self, file, /, *, estimate=None, limit=None): # byte which returns size 0. Oversize the buffer by 1 byte so the # I/O can be completed with two read() calls (one for all data, one # for EOF) without needing to resize the buffer. - # FIXME(cmaloney): This should probably be a memoryview.... target_read = None if estimate is not None: target_read = int(estimate) + 1 @@ -954,16 +953,14 @@ def readfrom(self, file, /, *, estimate=None, limit=None): if limit < 0: raise ValueError(f"limit must be larger than 0, got {limit}") - # Expand buffer to get target read in one read when possible. if limit is not None: target_read = min(target_read, limit) - # Expand so target read definitely fits. + # Expand buffer to get target read in one read when possible. if len(self._buffer) < target_read + self._pos: self._buffer.resize(self._pos + target_read) - # File descriptor - if isinstance(file, int): + if isinstance(file, int): # File descriptor read_fn = lambda: os.readinto(file, memoryview(self._buffer)[self._pos:]) elif file_readinto := getattr(file, "readinto", None): read_fn = lambda: file_readinto(memoryview(self._buffer)[self._pos:])