From 7cc4a85edecc32dc4cfa477444c974b5cc99e1ee Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 24 Mar 2025 00:47:23 +1300 Subject: [PATCH 1/9] gh-73069 c1: fix race conditions with os.scandir iterator --- Modules/posixmodule.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f3ce1fb632226e..7a07bb9007662a 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16181,6 +16181,7 @@ typedef struct { #ifdef HAVE_FDOPENDIR int fd; #endif + PyMutex mutex; } ScandirIterator; #define ScandirIterator_CAST(op) ((ScandirIterator *)(op)) @@ -16190,18 +16191,21 @@ typedef struct { static int ScandirIterator_is_closed(ScandirIterator *iterator) { - return iterator->handle == INVALID_HANDLE_VALUE; + return _Py_atomic_load_ptr_relaxed(&iterator->handle) == INVALID_HANDLE_VALUE; } static void ScandirIterator_closedir(ScandirIterator *iterator) { + PyMutex_Lock(&iterator->mutex); HANDLE handle = iterator->handle; + iterator->handle = INVALID_HANDLE_VALUE; + PyMutex_Unlock(&iterator->mutex); - if (handle == INVALID_HANDLE_VALUE) + if (handle == INVALID_HANDLE_VALUE) { return; + } - iterator->handle = INVALID_HANDLE_VALUE; Py_BEGIN_ALLOW_THREADS FindClose(handle); Py_END_ALLOW_THREADS @@ -16215,11 +16219,8 @@ ScandirIterator_iternext(PyObject *op) BOOL success; PyObject *entry; - /* Happens if the iterator is iterated twice, or closed explicitly */ - if (iterator->handle == INVALID_HANDLE_VALUE) - return NULL; - - while (1) { + PyMutex_Lock(&iterator->mutex); + while (iterator->handle != INVALID_HANDLE_VALUE) { if (!iterator->first_time) { Py_BEGIN_ALLOW_THREADS success = FindNextFileW(iterator->handle, file_data); @@ -16241,13 +16242,15 @@ ScandirIterator_iternext(PyObject *op) entry = DirEntry_from_find_data(module, &iterator->path, file_data); if (!entry) break; + PyMutex_Unlock(&iterator->mutex); return entry; } /* Loop till we get a non-dot directory or finish iterating */ } - /* Error or no more files */ + /* Already closed, error, or no more files */ + PyMutex_Unlock(&iterator->mutex); ScandirIterator_closedir(iterator); return NULL; } @@ -16257,18 +16260,21 @@ ScandirIterator_iternext(PyObject *op) static int ScandirIterator_is_closed(ScandirIterator *iterator) { - return !iterator->dirp; + return !_Py_atomic_load_ptr_relaxed(&iterator->dirp); } static void ScandirIterator_closedir(ScandirIterator *iterator) { + PyMutex_Lock(&iterator->mutex); DIR *dirp = iterator->dirp; + iterator->dirp = NULL; + PyMutex_Unlock(&iterator->mutex); - if (!dirp) + if (!dirp) { return; + } - iterator->dirp = NULL; Py_BEGIN_ALLOW_THREADS #ifdef HAVE_FDOPENDIR if (iterator->path.fd != -1) @@ -16288,11 +16294,8 @@ ScandirIterator_iternext(PyObject *op) int is_dot; PyObject *entry; - /* Happens if the iterator is iterated twice, or closed explicitly */ - if (!iterator->dirp) - return NULL; - - while (1) { + PyMutex_Lock(&iterator->mutex); + while (iterator->dirp) { errno = 0; Py_BEGIN_ALLOW_THREADS direntp = readdir(iterator->dirp); @@ -16320,13 +16323,15 @@ ScandirIterator_iternext(PyObject *op) ); if (!entry) break; + PyMutex_Unlock(&iterator->mutex); return entry; } /* Loop till we get a non-dot directory or finish iterating */ } - /* Error or no more files */ + /* Already closed, error, or no more files */ + PyMutex_Unlock(&iterator->mutex); ScandirIterator_closedir(iterator); return NULL; } @@ -16459,6 +16464,7 @@ os_scandir_impl(PyObject *module, path_t *path) if (!iterator) return NULL; + iterator->mutex = (PyMutex){0}; #ifdef MS_WINDOWS iterator->handle = INVALID_HANDLE_VALUE; #else From a23f0dd465909da6af008a396c487afeebb0ad7a Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 24 Mar 2025 11:19:38 +1300 Subject: [PATCH 2/9] gh-73069 c2: fix race conditions with os.scandir direntry stat --- Modules/posixmodule.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 7a07bb9007662a..5554b8c3fd1aab 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -15582,6 +15582,7 @@ typedef struct { ino_t d_ino; int dir_fd; #endif + PyMutex mutex; } DirEntry; #define DirEntry_CAST(op) ((DirEntry *)(op)) @@ -15591,10 +15592,12 @@ DirEntry_dealloc(PyObject *op) { DirEntry *entry = DirEntry_CAST(op); PyTypeObject *tp = Py_TYPE(entry); + PyMutex_Lock(&entry->mutex); Py_XDECREF(entry->name); Py_XDECREF(entry->path); Py_XDECREF(entry->stat); Py_XDECREF(entry->lstat); + PyMutex_Unlock(&entry->mutex); freefunc free_func = PyType_GetSlot(tp, Py_tp_free); free_func(entry); Py_DECREF(tp); @@ -15712,6 +15715,7 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) static PyObject * DirEntry_get_lstat(PyTypeObject *defining_class, DirEntry *self) { + /* Must be called with self->mutex held */ if (!self->lstat) { PyObject *module = PyType_GetModule(defining_class); #ifdef MS_WINDOWS @@ -15739,12 +15743,17 @@ os_DirEntry_stat_impl(DirEntry *self, PyTypeObject *defining_class, /*[clinic end generated code: output=23f803e19c3e780e input=e816273c4e67ee98]*/ { if (!follow_symlinks) { - return DirEntry_get_lstat(defining_class, self); + PyMutex_Lock(&self->mutex); + PyObject *stat = DirEntry_get_lstat(defining_class, self); + PyMutex_Unlock(&self->mutex); + return stat; } + PyMutex_Lock(&self->mutex); if (!self->stat) { int result = os_DirEntry_is_symlink_impl(self, defining_class); if (result == -1) { + PyMutex_Unlock(&self->mutex); return NULL; } if (result) { @@ -15755,6 +15764,7 @@ os_DirEntry_stat_impl(DirEntry *self, PyTypeObject *defining_class, self->stat = DirEntry_get_lstat(defining_class, self); } } + PyMutex_Unlock(&self->mutex); return Py_XNewRef(self->stat); } @@ -16029,6 +16039,7 @@ DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) entry->stat = NULL; entry->lstat = NULL; entry->got_file_index = 0; + entry->mutex = (PyMutex){0}; entry->name = PyUnicode_FromWideChar(dataW->cFileName, -1); if (!entry->name) @@ -16121,6 +16132,7 @@ DirEntry_from_posix_info(PyObject *module, path_t *path, const char *name, entry->path = NULL; entry->stat = NULL; entry->lstat = NULL; + entry->mutex = (PyMutex){0}; if (path->fd != -1) { entry->dir_fd = path->fd; From 7ef377f9e5f99e7aecb6e68361ecceeb78a41490 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 24 Mar 2025 15:30:37 +1300 Subject: [PATCH 3/9] NEWS entry for gh-73069 --- .../next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst diff --git a/Misc/NEWS.d/next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst b/Misc/NEWS.d/next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst new file mode 100644 index 00000000000000..a4ed0ded1d2199 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst @@ -0,0 +1 @@ +Fixed some race conditions in os.scandir and associated direntry. From f757a3b8f3ca2d92819e4178efe248e9f8e5d336 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Tue, 25 Mar 2025 12:20:56 +1300 Subject: [PATCH 4/9] Switch to _PyRecursiveMutex from PyMutex It seems we do need to worry about re-entrancy, so switch to using a recursive mutex. Note we can't use a critical section since we need to perform blocking IO while holding the lock, which critical section would implicitly release. --- Modules/posixmodule.c | 44 +++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 5554b8c3fd1aab..d79ac2d17cf051 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -15582,7 +15582,7 @@ typedef struct { ino_t d_ino; int dir_fd; #endif - PyMutex mutex; + _PyRecursiveMutex mutex; } DirEntry; #define DirEntry_CAST(op) ((DirEntry *)(op)) @@ -15592,12 +15592,12 @@ DirEntry_dealloc(PyObject *op) { DirEntry *entry = DirEntry_CAST(op); PyTypeObject *tp = Py_TYPE(entry); - PyMutex_Lock(&entry->mutex); + _PyRecursiveMutex_Lock(&entry->mutex); Py_XDECREF(entry->name); Py_XDECREF(entry->path); Py_XDECREF(entry->stat); Py_XDECREF(entry->lstat); - PyMutex_Unlock(&entry->mutex); + _PyRecursiveMutex_Unlock(&entry->mutex); freefunc free_func = PyType_GetSlot(tp, Py_tp_free); free_func(entry); Py_DECREF(tp); @@ -15743,17 +15743,17 @@ os_DirEntry_stat_impl(DirEntry *self, PyTypeObject *defining_class, /*[clinic end generated code: output=23f803e19c3e780e input=e816273c4e67ee98]*/ { if (!follow_symlinks) { - PyMutex_Lock(&self->mutex); + _PyRecursiveMutex_Lock(&self->mutex); PyObject *stat = DirEntry_get_lstat(defining_class, self); - PyMutex_Unlock(&self->mutex); + _PyRecursiveMutex_Unlock(&self->mutex); return stat; } - PyMutex_Lock(&self->mutex); + _PyRecursiveMutex_Lock(&self->mutex); if (!self->stat) { int result = os_DirEntry_is_symlink_impl(self, defining_class); if (result == -1) { - PyMutex_Unlock(&self->mutex); + _PyRecursiveMutex_Unlock(&self->mutex); return NULL; } if (result) { @@ -15764,7 +15764,7 @@ os_DirEntry_stat_impl(DirEntry *self, PyTypeObject *defining_class, self->stat = DirEntry_get_lstat(defining_class, self); } } - PyMutex_Unlock(&self->mutex); + _PyRecursiveMutex_Unlock(&self->mutex); return Py_XNewRef(self->stat); } @@ -16039,7 +16039,7 @@ DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) entry->stat = NULL; entry->lstat = NULL; entry->got_file_index = 0; - entry->mutex = (PyMutex){0}; + entry->mutex = (_PyRecursiveMutex){0}; entry->name = PyUnicode_FromWideChar(dataW->cFileName, -1); if (!entry->name) @@ -16132,7 +16132,7 @@ DirEntry_from_posix_info(PyObject *module, path_t *path, const char *name, entry->path = NULL; entry->stat = NULL; entry->lstat = NULL; - entry->mutex = (PyMutex){0}; + entry->mutex = (_PyRecursiveMutex){0}; if (path->fd != -1) { entry->dir_fd = path->fd; @@ -16193,7 +16193,7 @@ typedef struct { #ifdef HAVE_FDOPENDIR int fd; #endif - PyMutex mutex; + _PyRecursiveMutex mutex; } ScandirIterator; #define ScandirIterator_CAST(op) ((ScandirIterator *)(op)) @@ -16209,10 +16209,10 @@ ScandirIterator_is_closed(ScandirIterator *iterator) static void ScandirIterator_closedir(ScandirIterator *iterator) { - PyMutex_Lock(&iterator->mutex); + _PyRecursiveMutex_Lock(&iterator->mutex); HANDLE handle = iterator->handle; iterator->handle = INVALID_HANDLE_VALUE; - PyMutex_Unlock(&iterator->mutex); + _PyRecursiveMutex_Unlock(&iterator->mutex); if (handle == INVALID_HANDLE_VALUE) { return; @@ -16231,7 +16231,7 @@ ScandirIterator_iternext(PyObject *op) BOOL success; PyObject *entry; - PyMutex_Lock(&iterator->mutex); + _PyRecursiveMutex_Lock(&iterator->mutex); while (iterator->handle != INVALID_HANDLE_VALUE) { if (!iterator->first_time) { Py_BEGIN_ALLOW_THREADS @@ -16254,7 +16254,7 @@ ScandirIterator_iternext(PyObject *op) entry = DirEntry_from_find_data(module, &iterator->path, file_data); if (!entry) break; - PyMutex_Unlock(&iterator->mutex); + _PyRecursiveMutex_Unlock(&iterator->mutex); return entry; } @@ -16262,7 +16262,7 @@ ScandirIterator_iternext(PyObject *op) } /* Already closed, error, or no more files */ - PyMutex_Unlock(&iterator->mutex); + _PyRecursiveMutex_Unlock(&iterator->mutex); ScandirIterator_closedir(iterator); return NULL; } @@ -16278,10 +16278,10 @@ ScandirIterator_is_closed(ScandirIterator *iterator) static void ScandirIterator_closedir(ScandirIterator *iterator) { - PyMutex_Lock(&iterator->mutex); + _PyRecursiveMutex_Lock(&iterator->mutex); DIR *dirp = iterator->dirp; iterator->dirp = NULL; - PyMutex_Unlock(&iterator->mutex); + _PyRecursiveMutex_Unlock(&iterator->mutex); if (!dirp) { return; @@ -16306,7 +16306,7 @@ ScandirIterator_iternext(PyObject *op) int is_dot; PyObject *entry; - PyMutex_Lock(&iterator->mutex); + _PyRecursiveMutex_Lock(&iterator->mutex); while (iterator->dirp) { errno = 0; Py_BEGIN_ALLOW_THREADS @@ -16335,7 +16335,7 @@ ScandirIterator_iternext(PyObject *op) ); if (!entry) break; - PyMutex_Unlock(&iterator->mutex); + _PyRecursiveMutex_Unlock(&iterator->mutex); return entry; } @@ -16343,7 +16343,7 @@ ScandirIterator_iternext(PyObject *op) } /* Already closed, error, or no more files */ - PyMutex_Unlock(&iterator->mutex); + _PyRecursiveMutex_Unlock(&iterator->mutex); ScandirIterator_closedir(iterator); return NULL; } @@ -16476,7 +16476,7 @@ os_scandir_impl(PyObject *module, path_t *path) if (!iterator) return NULL; - iterator->mutex = (PyMutex){0}; + iterator->mutex = (_PyRecursiveMutex){0}; #ifdef MS_WINDOWS iterator->handle = INVALID_HANDLE_VALUE; #else From 40730d36daed6936deb781a5f66843e86c192595 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Tue, 25 Mar 2025 14:08:13 +1300 Subject: [PATCH 5/9] Refactor so mutex is held for minimal time and not over any code that could be re-entrant. --- Modules/posixmodule.c | 122 ++++++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 51 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index d79ac2d17cf051..03bbf01b66af2c 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16193,7 +16193,7 @@ typedef struct { #ifdef HAVE_FDOPENDIR int fd; #endif - _PyRecursiveMutex mutex; + PyMutex mutex; } ScandirIterator; #define ScandirIterator_CAST(op) ((ScandirIterator *)(op)) @@ -16209,10 +16209,10 @@ ScandirIterator_is_closed(ScandirIterator *iterator) static void ScandirIterator_closedir(ScandirIterator *iterator) { - _PyRecursiveMutex_Lock(&iterator->mutex); + PyMutex_Lock(&iterator->mutex); HANDLE handle = iterator->handle; iterator->handle = INVALID_HANDLE_VALUE; - _PyRecursiveMutex_Unlock(&iterator->mutex); + PyMutex_Unlock(&iterator->mutex); if (handle == INVALID_HANDLE_VALUE) { return; @@ -16223,38 +16223,49 @@ ScandirIterator_closedir(ScandirIterator *iterator) Py_END_ALLOW_THREADS } +static BOOL +ScandirIterator_nextdirentry(ScandirIterator *iterator, WIN32_FIND_DATAW *file_data) +{ + BOOL has_error = FALSE; + BOOL has_result = FALSE; + PyMutex_Lock(&iterator->mutex); + if (iterator->handle != INVALID_HANDLE_VALUE) { + if (iterator->first_time) { + has_result = TRUE; + memcpy(file_data, &iterator->file_data, sizeof(WIN32_FIND_DATAW)); + iterator->first_time = 0; + } else { + Py_BEGIN_ALLOW_THREADS + has_result = FindNextFileW(iterator->handle, file_data); + Py_END_ALLOW_THREADS + has_error = !has_result && GetLastError() != ERROR_NO_MORE_FILES; + } + } + PyMutex_Unlock(&iterator->mutex); + + /* Error or no more files */ + if (has_error) + path_error(&iterator->path); + + return has_result; +} + static PyObject * ScandirIterator_iternext(PyObject *op) { ScandirIterator *iterator = ScandirIterator_CAST(op); - WIN32_FIND_DATAW *file_data = &iterator->file_data; - BOOL success; + WIN32_FIND_DATAW file_data; PyObject *entry; - _PyRecursiveMutex_Lock(&iterator->mutex); - while (iterator->handle != INVALID_HANDLE_VALUE) { - if (!iterator->first_time) { - Py_BEGIN_ALLOW_THREADS - success = FindNextFileW(iterator->handle, file_data); - Py_END_ALLOW_THREADS - if (!success) { - /* Error or no more files */ - if (GetLastError() != ERROR_NO_MORE_FILES) - path_error(&iterator->path); - break; - } - } - iterator->first_time = 0; - + while (ScandirIterator_nextdirentry(iterator, &file_data)) { /* Skip over . and .. */ - if (wcscmp(file_data->cFileName, L".") != 0 && - wcscmp(file_data->cFileName, L"..") != 0) + if (wcscmp(file_data.cFileName, L".") != 0 && + wcscmp(file_data.cFileName, L"..") != 0) { PyObject *module = PyType_GetModule(Py_TYPE(iterator)); - entry = DirEntry_from_find_data(module, &iterator->path, file_data); + entry = DirEntry_from_find_data(module, &iterator->path, &file_data); if (!entry) break; - _PyRecursiveMutex_Unlock(&iterator->mutex); return entry; } @@ -16262,7 +16273,6 @@ ScandirIterator_iternext(PyObject *op) } /* Already closed, error, or no more files */ - _PyRecursiveMutex_Unlock(&iterator->mutex); ScandirIterator_closedir(iterator); return NULL; } @@ -16278,10 +16288,10 @@ ScandirIterator_is_closed(ScandirIterator *iterator) static void ScandirIterator_closedir(ScandirIterator *iterator) { - _PyRecursiveMutex_Lock(&iterator->mutex); + PyMutex_Lock(&iterator->mutex); DIR *dirp = iterator->dirp; iterator->dirp = NULL; - _PyRecursiveMutex_Unlock(&iterator->mutex); + PyMutex_Unlock(&iterator->mutex); if (!dirp) { return; @@ -16297,45 +16307,56 @@ ScandirIterator_closedir(ScandirIterator *iterator) return; } +static int +ScandirIterator_nextdirentry(ScandirIterator *iterator, struct dirent *direntp) +{ + int has_error = 0; + struct dirent *result = NULL; + PyMutex_Lock(&iterator->mutex); + if (iterator->dirp) { + errno = 0; + Py_BEGIN_ALLOW_THREADS + result = readdir(iterator->dirp); + Py_END_ALLOW_THREADS + has_error = !result && errno != 0; + } + /* We need to make a copy of the result before releasing the lock */ + if (result) + memcpy(direntp, result, sizeof(struct dirent)); + PyMutex_Unlock(&iterator->mutex); + + /* Error or no more files */ + if (has_error) + path_error(&iterator->path); + + return result != NULL; +} + static PyObject * ScandirIterator_iternext(PyObject *op) { ScandirIterator *iterator = ScandirIterator_CAST(op); - struct dirent *direntp; + struct dirent direntp; Py_ssize_t name_len; int is_dot; PyObject *entry; - _PyRecursiveMutex_Lock(&iterator->mutex); - while (iterator->dirp) { - errno = 0; - Py_BEGIN_ALLOW_THREADS - direntp = readdir(iterator->dirp); - Py_END_ALLOW_THREADS - - if (!direntp) { - /* Error or no more files */ - if (errno != 0) - path_error(&iterator->path); - break; - } - + while ((ScandirIterator_nextdirentry(iterator, &direntp))) { /* Skip over . and .. */ - name_len = NAMLEN(direntp); - is_dot = direntp->d_name[0] == '.' && - (name_len == 1 || (direntp->d_name[1] == '.' && name_len == 2)); + name_len = NAMLEN(&direntp); + is_dot = direntp.d_name[0] == '.' && + (name_len == 1 || (direntp.d_name[1] == '.' && name_len == 2)); if (!is_dot) { PyObject *module = PyType_GetModule(Py_TYPE(iterator)); entry = DirEntry_from_posix_info(module, - &iterator->path, direntp->d_name, - name_len, direntp->d_ino + &iterator->path, direntp.d_name, + name_len, direntp.d_ino #ifdef HAVE_DIRENT_D_TYPE - , direntp->d_type + , direntp.d_type #endif ); if (!entry) break; - _PyRecursiveMutex_Unlock(&iterator->mutex); return entry; } @@ -16343,7 +16364,6 @@ ScandirIterator_iternext(PyObject *op) } /* Already closed, error, or no more files */ - _PyRecursiveMutex_Unlock(&iterator->mutex); ScandirIterator_closedir(iterator); return NULL; } @@ -16476,7 +16496,7 @@ os_scandir_impl(PyObject *module, path_t *path) if (!iterator) return NULL; - iterator->mutex = (_PyRecursiveMutex){0}; + iterator->mutex = (PyMutex){0}; #ifdef MS_WINDOWS iterator->handle = INVALID_HANDLE_VALUE; #else From d374264a3cf7a146777e3e20359d6804eb0f2bac Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Tue, 25 Mar 2025 14:13:14 +1300 Subject: [PATCH 6/9] Refactor to use atomics & compare/exchange instead of holding a lock to avoid possibility of deadlock upon re-entrance. --- Modules/posixmodule.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 03bbf01b66af2c..f732c5a119c2d8 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -15582,7 +15582,6 @@ typedef struct { ino_t d_ino; int dir_fd; #endif - _PyRecursiveMutex mutex; } DirEntry; #define DirEntry_CAST(op) ((DirEntry *)(op)) @@ -15592,12 +15591,10 @@ DirEntry_dealloc(PyObject *op) { DirEntry *entry = DirEntry_CAST(op); PyTypeObject *tp = Py_TYPE(entry); - _PyRecursiveMutex_Lock(&entry->mutex); Py_XDECREF(entry->name); Py_XDECREF(entry->path); Py_XDECREF(entry->stat); Py_XDECREF(entry->lstat); - _PyRecursiveMutex_Unlock(&entry->mutex); freefunc free_func = PyType_GetSlot(tp, Py_tp_free); free_func(entry); Py_DECREF(tp); @@ -15715,16 +15712,19 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) static PyObject * DirEntry_get_lstat(PyTypeObject *defining_class, DirEntry *self) { - /* Must be called with self->mutex held */ - if (!self->lstat) { + if (!FT_ATOMIC_LOAD_PTR(self->lstat)) { PyObject *module = PyType_GetModule(defining_class); + PyObject *lstat; + PyObject *null_ptr = NULL; #ifdef MS_WINDOWS - self->lstat = _pystat_fromstructstat(module, &self->win32_lstat); + lstat = _pystat_fromstructstat(module, &self->win32_lstat); #else /* POSIX */ - self->lstat = DirEntry_fetch_stat(module, self, 0); + lstat = DirEntry_fetch_stat(module, self, 0); #endif + if (!_Py_atomic_compare_exchange_ptr(&self->lstat, &null_ptr, lstat)) + Py_XDECREF(lstat); } - return Py_XNewRef(self->lstat); + return Py_XNewRef(FT_ATOMIC_LOAD_PTR(self->lstat)); } /*[clinic input] @@ -15742,31 +15742,28 @@ os_DirEntry_stat_impl(DirEntry *self, PyTypeObject *defining_class, int follow_symlinks) /*[clinic end generated code: output=23f803e19c3e780e input=e816273c4e67ee98]*/ { - if (!follow_symlinks) { - _PyRecursiveMutex_Lock(&self->mutex); - PyObject *stat = DirEntry_get_lstat(defining_class, self); - _PyRecursiveMutex_Unlock(&self->mutex); - return stat; - } + if (!follow_symlinks) + return DirEntry_get_lstat(defining_class, self); - _PyRecursiveMutex_Lock(&self->mutex); - if (!self->stat) { + if (!FT_ATOMIC_LOAD_PTR(self->stat)) { + PyObject *stat; + PyObject *null_ptr = NULL; int result = os_DirEntry_is_symlink_impl(self, defining_class); if (result == -1) { - _PyRecursiveMutex_Unlock(&self->mutex); return NULL; } if (result) { PyObject *module = PyType_GetModule(defining_class); - self->stat = DirEntry_fetch_stat(module, self, 1); + stat = DirEntry_fetch_stat(module, self, 1); } else { - self->stat = DirEntry_get_lstat(defining_class, self); + stat = DirEntry_get_lstat(defining_class, self); } + if (!_Py_atomic_compare_exchange_ptr(&self->stat, &null_ptr, stat)) + Py_XDECREF(stat); } - _PyRecursiveMutex_Unlock(&self->mutex); - return Py_XNewRef(self->stat); + return Py_XNewRef(FT_ATOMIC_LOAD_PTR(self->stat)); } /* Set exception and return -1 on error, 0 for False, 1 for True */ @@ -16039,7 +16036,6 @@ DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) entry->stat = NULL; entry->lstat = NULL; entry->got_file_index = 0; - entry->mutex = (_PyRecursiveMutex){0}; entry->name = PyUnicode_FromWideChar(dataW->cFileName, -1); if (!entry->name) @@ -16132,7 +16128,6 @@ DirEntry_from_posix_info(PyObject *module, path_t *path, const char *name, entry->path = NULL; entry->stat = NULL; entry->lstat = NULL; - entry->mutex = (_PyRecursiveMutex){0}; if (path->fd != -1) { entry->dir_fd = path->fd; From 6b3b46d4bed99ca2a1aba78ff36f485b62d7ac23 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 26 Mar 2025 12:50:03 +1300 Subject: [PATCH 7/9] Refactor to share code and eliminate duplication between Windows & POSIX platforms --- Modules/posixmodule.c | 104 +++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 62 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f732c5a119c2d8..2c687b63337bec 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4430,6 +4430,25 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd, } #endif +#ifdef MS_WINDOWS + +static int +_is_dot_filename(WIN32_FIND_DATAW *entry) +{ + return wcscmp(entry->cFileName, L".") == 0 || wcscmp(entry->cFileName, L"..") == 0; +} + +#else + +static int +_is_dot_filename(struct dirent *entry) +{ + Py_ssize_t name_len = NAMLEN(entry); + return entry->d_name[0] == '.' && + (name_len == 1 || (entry->d_name[1] == '.' && name_len == 2)); +} + +#endif #if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) static PyObject * @@ -4484,8 +4503,7 @@ _listdir_windows_no_opendir(path_t *path, PyObject *list) } do { /* Skip over . and .. */ - if (wcscmp(wFileData.cFileName, L".") != 0 && - wcscmp(wFileData.cFileName, L"..") != 0) { + if (!_is_dot_filename(&wFileData)) { v = PyUnicode_FromWideChar(wFileData.cFileName, wcslen(wFileData.cFileName)); if (return_bytes && v) { @@ -16020,7 +16038,7 @@ join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename) } static PyObject * -DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) +DirEntry_from_os(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) { DirEntry *entry; BY_HANDLE_FILE_INFORMATION file_info; @@ -16110,15 +16128,12 @@ join_path_filename(const char *path_narrow, const char* filename, Py_ssize_t fil } static PyObject * -DirEntry_from_posix_info(PyObject *module, path_t *path, const char *name, - Py_ssize_t name_len, ino_t d_ino -#ifdef HAVE_DIRENT_D_TYPE - , unsigned char d_type -#endif - ) +DirEntry_from_os(PyObject *module, path_t *path, struct dirent *direntp) { DirEntry *entry; char *joined_path; + const char *name = direntp->d_name; + Py_ssize_t name_len = NAMLEN(direntp); PyObject *DirEntryType = get_posix_state(module)->DirEntryType; entry = PyObject_New(DirEntry, (PyTypeObject *)DirEntryType); @@ -16161,9 +16176,9 @@ DirEntry_from_posix_info(PyObject *module, path_t *path, const char *name, goto error; #ifdef HAVE_DIRENT_D_TYPE - entry->d_type = d_type; + entry->d_type = direntp->d_type; #endif - entry->d_ino = d_ino; + entry->d_ino = direntp->d_ino; return (PyObject *)entry; @@ -16245,33 +16260,6 @@ ScandirIterator_nextdirentry(ScandirIterator *iterator, WIN32_FIND_DATAW *file_d return has_result; } -static PyObject * -ScandirIterator_iternext(PyObject *op) -{ - ScandirIterator *iterator = ScandirIterator_CAST(op); - WIN32_FIND_DATAW file_data; - PyObject *entry; - - while (ScandirIterator_nextdirentry(iterator, &file_data)) { - /* Skip over . and .. */ - if (wcscmp(file_data.cFileName, L".") != 0 && - wcscmp(file_data.cFileName, L"..") != 0) - { - PyObject *module = PyType_GetModule(Py_TYPE(iterator)); - entry = DirEntry_from_find_data(module, &iterator->path, &file_data); - if (!entry) - break; - return entry; - } - - /* Loop till we get a non-dot directory or finish iterating */ - } - - /* Already closed, error, or no more files */ - ScandirIterator_closedir(iterator); - return NULL; -} - #else /* POSIX */ static int @@ -16327,35 +16315,29 @@ ScandirIterator_nextdirentry(ScandirIterator *iterator, struct dirent *direntp) return result != NULL; } +#endif + static PyObject * ScandirIterator_iternext(PyObject *op) { +#ifdef MS_WINDOWS + WIN32_FIND_DATAW dirent; +#else + struct dirent dirent; +#endif ScandirIterator *iterator = ScandirIterator_CAST(op); - struct dirent direntp; - Py_ssize_t name_len; - int is_dot; - PyObject *entry; - while ((ScandirIterator_nextdirentry(iterator, &direntp))) { + while ((ScandirIterator_nextdirentry(iterator, &dirent))) { + /* Skip over . and .. */ - name_len = NAMLEN(&direntp); - is_dot = direntp.d_name[0] == '.' && - (name_len == 1 || (direntp.d_name[1] == '.' && name_len == 2)); - if (!is_dot) { - PyObject *module = PyType_GetModule(Py_TYPE(iterator)); - entry = DirEntry_from_posix_info(module, - &iterator->path, direntp.d_name, - name_len, direntp.d_ino -#ifdef HAVE_DIRENT_D_TYPE - , direntp.d_type -#endif - ); - if (!entry) - break; - return entry; - } + if (_is_dot_filename(&dirent)) + continue; - /* Loop till we get a non-dot directory or finish iterating */ + PyObject *module = PyType_GetModule(Py_TYPE(iterator)); + PyObject *entry = DirEntry_from_os(module, &iterator->path, &dirent); + if (!entry) + break; + return entry; } /* Already closed, error, or no more files */ @@ -16363,8 +16345,6 @@ ScandirIterator_iternext(PyObject *op) return NULL; } -#endif - static PyObject * ScandirIterator_close(PyObject *op, PyObject *Py_UNUSED(dummy)) { From 072a07cec3e0400d69e1ab7c4725ab12c01cb2dc Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Thu, 27 Mar 2025 10:04:21 +1300 Subject: [PATCH 8/9] struct dirent is variable length in the original POSIX specification (the final filename field being an unsized array) and in actual practice on the WASI platform. Thus just copying one into a stack-allocated structure is not good enough. The obvious way to address this is to allocate memory on the heap and return that. However, it seems wasteful to dynamically allocate a temporary chunk of data just to copy some data in and then out again. Instead, if the name field is potentially not long enough, allocate some extra space *on the stack* following the structure such that it must be sufficient to hold the the full record length (and copy all of that, not just the potentially smaller fixed structure size). --- Modules/posixmodule.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 2c687b63337bec..467c0d7365f774 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -204,6 +204,9 @@ # include // __msan_unpoison() #endif +#ifdef HAVE_ALLOCA_H +#include +#endif // --- More complex system includes ----------------------------------------- @@ -16303,9 +16306,19 @@ ScandirIterator_nextdirentry(ScandirIterator *iterator, struct dirent *direntp) Py_END_ALLOW_THREADS has_error = !result && errno != 0; } + /* We need to make a copy of the result before releasing the lock */ - if (result) - memcpy(direntp, result, sizeof(struct dirent)); + if (result) { + + /* A note on calculating the size: the dirent structure may be variable + * length with the d_name field being a flexible array member, so we + * cannot just use sizeof(struct dirent). On Linux there is a d_reclen + * field, however that is not guaranteed to exist (and doesn't on WASI, + * which happens to be the platform where this is an issue). Hence we + * calculate the relevant record size thus. */ + size_t reclen = offsetof(struct dirent, d_name) + strlen(result->d_name) + 1; + memcpy(direntp, result, reclen); + } PyMutex_Unlock(&iterator->mutex); /* Error or no more files */ @@ -16320,12 +16333,22 @@ ScandirIterator_nextdirentry(ScandirIterator *iterator, struct dirent *direntp) static PyObject * ScandirIterator_iternext(PyObject *op) { + ScandirIterator *iterator = ScandirIterator_CAST(op); #ifdef MS_WINDOWS WIN32_FIND_DATAW dirent; #else + /* + * This may be a variable length structure, with the final "d_name" field + * possibly being a flexible array member. In that case allocate enough + * extra space on the stack, immediately following the structure, + * sufficient to handle the longest possible filename. */ struct dirent dirent; + if (sizeof(dirent) - offsetof(struct dirent, d_name) < (NAME_MAX + 1) + && !alloca(NAME_MAX + 1 - (sizeof(dirent) - offsetof(struct dirent, d_name)))) { + PyErr_NoMemory(); + goto error; + } #endif - ScandirIterator *iterator = ScandirIterator_CAST(op); while ((ScandirIterator_nextdirentry(iterator, &dirent))) { @@ -16341,6 +16364,7 @@ ScandirIterator_iternext(PyObject *op) } /* Already closed, error, or no more files */ +error: ScandirIterator_closedir(iterator); return NULL; } From e37ec68b6de1f7208624efb3462209ba5cf27003 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Sat, 31 May 2025 15:40:07 +1200 Subject: [PATCH 9/9] Add regression tests --- Lib/test/test_os.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 88b5b0e6e358bb..264fa8e5d6d876 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -26,6 +26,7 @@ import sysconfig import tempfile import textwrap +import threading import time import types import unittest @@ -37,6 +38,7 @@ from test.support import socket_helper from test.support import infinite_recursion from test.support import warnings_helper +from test.support import threading_helper from platform import win32_is_iot try: @@ -5406,6 +5408,49 @@ def test_resource_warning(self): with self.check_no_resource_warning(): del iterator + @support.requires_resource('cpu') + @threading_helper.requires_working_threading() + def test_races_scandir_iterations(self): + N = 4 + ITERATIONS = 10 if support.check_sanitizer(address=True) else 1000 + + self.create_file("file.txt") + self.create_file("file2.txt") + + def scan(iter, barrier): + barrier.wait() + for entry in iter: + pass + + for _ in range(ITERATIONS): + barrier = threading.Barrier(N) + iter = os.scandir(self.path) + + threads = [threading.Thread(target=scan, args=(iter, barrier)) + for _ in range(N)] + with threading_helper.start_threads(threads): + pass + + @threading_helper.requires_working_threading() + @unittest.skipUnless(support.check_sanitizer(address=True), "requires ASAN") + def test_races_scandir_entry(self): + N = 2 + ITERATIONS = 10 + + self.create_file("file.txt") + self.create_file("file2.txt") + + def stat(entry, barrier): + barrier.wait() + entry.stat() + + for _ in range(ITERATIONS): + barrier = threading.Barrier(N) + for entry in os.scandir(self.path): + threads = [threading.Thread(target=stat, args=(entry, barrier)) + for _ in range(N)] + with threading_helper.start_threads(threads): + pass class TestPEP519(unittest.TestCase):