8000 Reduce system calls in full-file readall case · python/cpython@78c4de0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 78c4de0

Browse files
committed
Reduce system calls in full-file readall case
This reduces the system call count of a simple program[0] that reads all the `.rst` files in Doc by over 10% (5706 -> 4734 system calls on my linux system, 5813 -> 4875 on my macOS) This reduces the number of `fstat()` calls always and seek calls most the time. Stat was always called twice, once at open (to error early on directories), and a second time to get the size of the file to be able to read the whole file in one read. Now the size is cached with the first call. The code keeps an optimization that if the user had previously read a lot of data, the current position is subtracted from the number of bytes to read. That is somewhat expensive so only do it on larger files, otherwise just try and read the extra bytes and resize the PyBytes as needeed. I built a little test program to validate the behavior + assumptions around relative costs and then ran it under `strace` to get a log of the system calls. Full samples below[1]. After the changes, this is everything in one `filename.read_text()`: ```python3 openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3` fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0` ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` This does make some tradeoffs 1. If the file size changes between open() and readall(), this will still get all the data but might have more read calls. 2. I experimented with avoiding the stat + cached result for small files in general, but on my dev workstation at least that tended to reduce performance compared to using the fstat(). [0] ```python3 from pathlib import Path nlines = [] for filename in Path("cpython/Doc").glob("**/*.rst"): nlines.append(len(filename.read_text())) ``` [1] before small file: ``` openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` after small file: ``` openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0 ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343 read(3, "", 1) = 0 close(3) = 0 ``` before large file: ``` openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104 read(3, "", 1) = 0 close(3) = 0 ``` after large file: ``` openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0 ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device) lseek(3, 0, SEEK_CUR) = 0 lseek(3, 0, SEEK_CUR) = 0 read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104 read(3, "", 1) = 0 close(3) = 0 ```
1 parent 89f7208 commit 78c4de0

File tree

1 file changed

+35
-24
lines changed

1 file changed

+35
-24
lines changed

Modules/_io/fileio.c

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ typedef struct {
7272
unsigned int closefd : 1;
7373
char finalizing;
7474
unsigned int blksize;
75+
Py_off_t size_estimated;
7576
PyObject *weakreflist;
7677
PyObject *dict;
7778
} fileio;
@@ -196,6 +197,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
196197
self->appending = 0;
197198
self->seekable = -1;
198199
self->blksize = 0;
200+
self->size_estimated = -1;
199201
self->closefd = 1;
200202
self->weakreflist = NULL;
201203
}
@@ -482,6 +484,9 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
482484
if (fdfstat.st_blksize > 1)
483485
self->blksize = fdfstat.st_blksize;
484486
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
487+
if (fdfstat.st_size < PY_SSIZE_T_MAX) {
488+
self->size_estimated = (Py_off_t)fdfstat.st_size;
489+
}
485490
}
486491

487492
#if defined(MS_WINDOWS) || defined(__CYGWIN__)
@@ -707,41 +712,48 @@ static PyObject *
707712
_io_FileIO_readall_impl(fileio *self)
708713
/*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/
709714
{
710-
struct _Py_stat_struct status;
711715
Py_off_t pos, end;
712716
PyObject *result;
713717
Py_ssize_t bytes_read = 0;
714718
Py_ssize_t n;
715719
size_t bufsize;
716-
int fstat_result;
717720

718-
if (self->fd < 0)
721+
if (self->fd < 0) {
719722
return err_closed();
723+
}
720724

721-
Py_BEGIN_ALLOW_THREADS
722-
_Py_BEGIN_SUPPRESS_IPH
725+
end = self->size_estimated;
726+
if (end <= 0) {
727+
/* Use a default size and resize as needed. */
728+
bufsize = SMALLCHUNK;
729+
}
730+
else {
731+
/* This is probably a real file, so we try to allocate a
732+
buffer one byte larger than the rest of the file. If the
733+
calculation is right then we should get EOF without having
734+
to enlarge the buffer. */
735+
bufsize = (size_t)(end) + 1;
736+
737+
/* While a lot of code does open().read() to get the whole contents
738+
of a file it is possible a caller seeks/reads a ways into the file
739+
then calls readall() to get the rest, which would result in allocating
740+
more than required. Guard against that for larger files where we expect
741+
the I/O time to dominate anyways while keeping small files fast. */
742+
if (bufsize > 65536) {
743+
Py_BEGIN_ALLOW_THREADS
744+
_Py_BEGIN_SUPPRESS_IPH
723745
#ifdef MS_WINDOWS
724-
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
746+
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
725747
#else
726-
pos = lseek(self->fd, 0L, SEEK_CUR);
748+
pos = lseek(self->fd, 0L, SEEK_CUR);
727749
#endif
728-
_Py_END_SUPPRESS_IPH
729-
fstat_result = _Py_fstat_noraise(self->fd, &status);
730-
Py_END_ALLOW_THREADS
731-
732-
if (fstat_result == 0)
733-
end = status.st_size;
734-
else
735-
end = (Py_off_t)-1;
750+
_Py_END_SUPPRESS_IPH
751+
Py_END_ALLOW_THREADS
736752

737-
if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) {
738-
/* This is probably a real file, so we try to allocate a
739-
buffer one byte larger than the rest of the file. If the
740-
calculation is right then we should get EOF without having
741-
to enlarge the buffer. */
742-
bufsize = (size_t)(end - pos + 1);
743-
} else {
744-
bufsize = SMALLCHUNK;
753+
if (end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) {
754+
bufsize = bufsize - pos;
755+
}
756+
}
745757
}
746758

747759
result = PyBytes_FromStringAndSize(NULL, bufsize);
@@ -783,7 +795,6 @@ _io_FileIO_readall_impl(fileio *self)
783795
return NULL;
784796
}
785797
bytes_read += n;
786-
pos += n;
787798
}
788799

789800
if (PyBytes_GET_SIZE(result) > bytes_read) {

0 commit comments

Comments
 (0)
0