8000 gh-129205: Experiment BytesIO._readfrom() by cmaloney · Pull Request #130098 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-129205: Experiment BytesIO._readfrom() #130098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Working
  • Loading branch information
cmaloney committed Feb 11, 2025
commit fd457a80a37eb1a4d938d7194b2e2dac045d0095
96 changes: 43 additions & 53 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,68 +940,60 @@ def readfrom(self, file, /, *, estimate=None, limit=None):
# I/O can be completed with two read() calls (one for all data, one
# for EOF) without needing to resize the buffer.
# FIXME(cmaloney): This should probably be a memoryview....
target_read = None
if estimate is not None:
estimate = int(estimate) + 1
target_read = int(estimate) + 1
else:
target_read = DEFAULT_BUFFER_SIZE
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: max(DEFAULT_BUFFER_SIZE, len(self._buffer) - self._pos) / if there's already lots of space use it.


# Cap to limit
if limit is not None:
limit = int(limit)
if limit <= 0:
if limit == 0:
return False
if limit < 0:
raise ValueError(f"limit must be larger than 0, got {limit}")

if target_read is not None:
target_read = min(target_read, limit)
# Expand buffer to get target read in one read when possible.
if limit is not None:
target_read = min(target_read, limit)

if self._pos - len(self._buffer) < estimate:
# Expand so target read definitely fits.
if len(self._buffer) < target_read + self._pos:
self._buffer.resize(self._pos + target_read)

# FIXME(cmaloney): Expand buffer if needed
found_eof = False
start_pos = self._pos
try:
while True:
bytes_read = self._pos - start_pos
if limit is not None and limit <= bytes_read:
return False

if target_read <= 0:
# FIXME(cmaloney): Check this matces
self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))

if limit is not None and bytes_read >= limit:
return False

# Make sure there is space for the read.
if target_read :


# Cap target read
# Hit cap, not EOF.
bytes_read = self._pos - start_pos
if bytes_read >= cap:
return False

read_size = len(self._buffer) - self._pos

# Calculate next read size.
if self._pos >= len(self._buffer):
self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))
while n := os.readinto(file, memoryview(self._buffer)[self._pos:]):
self._pos += n
# Expand buffer if needed.
if len(self._buffer) - self._pos <= 0:
bytes_read = self._pos - start_pos
target_read = _new_buffersize(bytes_read)

# Keep buffer size <= limit, so only need to check against
# limit when resizing.
if limit is not None:
remaining = limit - bytes_read
if remaining <= 0:
assert remaining == 0, "should never pass limit"
break
target_read = min(remaining, target_read)

self._buffer.resize(target_read + len(self._buffer))

if read_size <= 0:
# Fill remaining buffer, but never read more than cap.
read_size = len(self._buffer) - self._pos
read_size = min(start_pos - self, cap - bytes_read)
else:
assert len(self._buffer) - self._pos >= 1, \
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
found_eof = True

n = os.readinto(file, memoryview(self._buffer)[self._pos:])
self._pos += n
bytes_read += n
read_size -= n
if read_size <= 0:
read_size = _new_buffersize(bytes_read)
assert len(result) - bytes_read >= 1, \
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
except BlockingIOError:
if not bytes_read:
return None
pass

# Buffer must be
self._buffer.resize(self._pos)
return found_eof

def write(self, b):
if self.closed:
Expand Down Expand Up @@ -1754,12 +1746,10 @@ def readall(self):
pass

bio = BytesIO()
try:
bio.readfrom(self._fd, estimate=estimate)
return bio.getvalue()
except BlockingIOError:
result = bio.getvalue()
return result if result else None
found_eof = bio.readfrom(self._fd, estimate=estimate)
result = bio.getvalue()
# No limit in readfrom, so not finding eof indicates blocked.
return result if result or found_eof else None


def readinto(self, buffer):
Expand Down
59 changes: 36 additions & 23 deletions Modules/_io/bytesio.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,10 +494,10 @@ read loop if more than one buffer of data.

-1 == error, 0 == hit cap or blocked, exit, 1 == hit eof / True return, 2 == read more
*/
static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_size) {
assert(*cap_size > 0 ** "Must attempt to read at least one byte.");
static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *limit) {
assert(*limit > 0 ** "Must attempt to read at least one byte.");
char local_buffer[STACK_BUFER_SIZE];
Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *cap_size);
Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *limit);
Py_ssize_t result = _Py_read(fd, local_buffer, read_size);

/* Hit EOF in a single read, return True. */
Expand Down Expand Up @@ -528,10 +528,10 @@ static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *cap_s
return -1;
}
/* Hit cap, nothing left to do. */
if (result == *cap_size) {
if (result == *limit) {
return 0;
}
*cap_size -= result;
*limit -= result;
return 2;
}

Expand Down Expand Up @@ -576,9 +576,15 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
if (check_exports(self)) {
return -1;
}
/* Cap all reads to PY_SSIZE_T_MAX */
Py_ssize_t cap_size = Py_MIN(Py_MAX(limit, 0), PY_SSIZE_T_MAX);
assert(cap_size > 0);
/* Limit all reads to PY_SSIZE_T_MAX */
if (limit < 0) {
limit = PY_SSIZE_T_MAX;
} else if (limit == 0) {
// Limit == 0. no read.
// FIXME(cmaloney): Should this guarantee at least one read? (os.readinto technically accepts 0 length...)
return 0;
}
assert(limit > 0);

/* Try and get estimated_size in a single read. */
Py_ssize_t read_size = DEFAULT_BUFFER_SIZE;
Expand All @@ -589,17 +595,17 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
calls (one for all data, one for EOF) without needing
to resize the buffer. */
read_size = estimate + ((estimate <= PY_SSIZE_T_MAX - 1) ? 1 : 0);
} else if (estimate == 0 || cap_size < STACK_BUFER_SIZE) {
} else if (estimate == 0 || limit < STACK_BUFER_SIZE) {
/* A number of things in the normal path expect no data, use a small
temp buffer for those, only expanding buffer if absolutely needed. */
Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &cap_size);
Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &limit);
if (result != 2) {
return result;
}
}

/* Never read more than limit. */
read_size = Py_MIN(read_size, cap_size);
read_size = Py_MIN(read_size, limit);
assert(read_size > 0);

Py_ssize_t current_size = PyBytes_GET_SIZE(self->buf);
Expand All @@ -616,21 +622,28 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
while (true) {
/* Expand buffer if needed. */
if (self->string_size >= current_size) {
Py_ssize_t target_size = _bytesio_new_buffersize(current_size);
if (target_size > PY_SSIZE_T_MAX || target_size <= 0) {
if (current_size >= PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"unbounded read returned more bytes "
"than a Python bytes object can hold");
return -1;
"unbounded read returned more bytes "
"than a Python bytes object can hold");

}
if (_PyBytes_Resize(&self->buf, target_size)) {
Py_ssize_t target_read = _bytesio_new_buffersize(bytes_read);
/* Never read more than limit bytes_read. */
target_read = Py_MIN(target_read, limit - bytes_read);

/* Buffer can't get larger than PY_SSIZE_T_MAX */
if (PY_SSIZE_T_MAX - current_size < target_read) {
target_read = PY_SSIZE_T_MAX - current_size;
}

current_size += target_read;
if (_PyBytes_Resize(&self->buf, current_size)) {
return -1;
}
current_size = target_size;
read_size = target_size - current_size;
}
// DEBUG: printf("cs: %zd, ss: %zd, cap: %zd, read: %zd\n", current_size, self->string_size, cap_size, bytes_read);
read_size = Py_MIN(current_size - self->string_size, cap_size - bytes_read);
// DEBUG: printf("cs: %zd, ss: %zd, limit: %zd, read: %zd\n", current_size, self->string_size, limit, bytes_read);
read_size = Py_MIN(current_size - self->string_size, limit - bytes_read);
assert(read_size > 0); // Should always be reading some bytes.
assert(self->string_size + read_size <= current_size);
Py_ssize_t result = _Py_read(file,
Expand All @@ -652,8 +665,8 @@ _io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate,
assert(result >= 0); // Should have got bytes
self->string_size += result;
bytes_read += result;
assert(bytes_read <= cap_size); // Shold
if (bytes_read >= cap_size) {
assert(bytes_read <= limit); // Shold
if (bytes_read >= limit) {
found_eof = 0;
break;
}
Expand Down
0