diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 88b5b0e6e358bb..264fa8e5d6d876 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -26,6 +26,7 @@ import sysconfig import tempfile import textwrap +import threading import time import types import unittest @@ -37,6 +38,7 @@ from test.support import socket_helper from test.support import infinite_recursion from test.support import warnings_helper +from test.support import threading_helper from platform import win32_is_iot try: @@ -5406,6 +5408,49 @@ def test_resource_warning(self): with self.check_no_resource_warning(): del iterator + @support.requires_resource('cpu') + @threading_helper.requires_working_threading() + def test_races_scandir_iterations(self): + N = 4 + ITERATIONS = 10 if support.check_sanitizer(address=True) else 1000 + + self.create_file("file.txt") + self.create_file("file2.txt") + + def scan(iter, barrier): + barrier.wait() + for entry in iter: + pass + + for _ in range(ITERATIONS): + barrier = threading.Barrier(N) + iter = os.scandir(self.path) + + threads = [threading.Thread(target=scan, args=(iter, barrier)) + for _ in range(N)] + with threading_helper.start_threads(threads): + pass + + @threading_helper.requires_working_threading() + @unittest.skipUnless(support.check_sanitizer(address=True), "requires ASAN") + def test_races_scandir_entry(self): + N = 2 + ITERATIONS = 10 + + self.create_file("file.txt") + self.create_file("file2.txt") + + def stat(entry, barrier): + barrier.wait() + entry.stat() + + for _ in range(ITERATIONS): + barrier = threading.Barrier(N) + for entry in os.scandir(self.path): + threads = [threading.Thread(target=stat, args=(entry, barrier)) + for _ in range(N)] + with threading_helper.start_threads(threads): + pass class TestPEP519(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst b/Misc/NEWS.d/next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst new file mode 100644 index 00000000000000..a4ed0ded1d2199 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-24-15-16-20.gh-issue-73069.xKqJPz.rst @@ -0,0 +1 @@ +Fixed some race conditions in os.scandir and associated direntry. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 588894adeac811..e2a90d81a6c840 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -204,6 +204,9 @@ # include // __msan_unpoison() #endif +#ifdef HAVE_ALLOCA_H +#include +#endif // --- More complex system includes ----------------------------------------- @@ -4446,6 +4449,25 @@ os_link_impl(PyObject *module, path_t *src, path_t *dst, int src_dir_fd, } #endif +#ifdef MS_WINDOWS + +static int +_is_dot_filename(WIN32_FIND_DATAW *entry) +{ + return wcscmp(entry->cFileName, L".") == 0 || wcscmp(entry->cFileName, L"..") == 0; +} + +#else + +static int +_is_dot_filename(struct dirent *entry) +{ + Py_ssize_t name_len = NAMLEN(entry); + return entry->d_name[0] == '.' && + (name_len == 1 || (entry->d_name[1] == '.' && name_len == 2)); +} + +#endif #if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) static PyObject * @@ -4500,8 +4522,7 @@ _listdir_windows_no_opendir(path_t *path, PyObject *list) } do { /* Skip over . and .. */ - if (wcscmp(wFileData.cFileName, L".") != 0 && - wcscmp(wFileData.cFileName, L"..") != 0) { + if (!_is_dot_filename(&wFileData)) { v = PyUnicode_FromWideChar(wFileData.cFileName, wcslen(wFileData.cFileName)); if (return_bytes && v) { @@ -15730,15 +15751,19 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) static PyObject * DirEntry_get_lstat(PyTypeObject *defining_class, DirEntry *self) { - if (!self->lstat) { + if (!FT_ATOMIC_LOAD_PTR(self->lstat)) { PyObject *module = PyType_GetModule(defining_class); + PyObject *lstat; + PyObject *null_ptr = NULL; #ifdef MS_WINDOWS - self->lstat = _pystat_fromstructstat(module, &self->win32_lstat); + lstat = _pystat_fromstructstat(module, &self->win32_lstat); #else /* POSIX */ - self->lstat = DirEntry_fetch_stat(module, self, 0); + lstat = DirEntry_fetch_stat(module, self, 0); #endif + if (!_Py_atomic_compare_exchange_ptr(&self->lstat, &null_ptr, lstat)) + Py_XDECREF(lstat); } - return Py_XNewRef(self->lstat); + return Py_XNewRef(FT_ATOMIC_LOAD_PTR(self->lstat)); } /*[clinic input] @@ -15756,25 +15781,28 @@ os_DirEntry_stat_impl(DirEntry *self, PyTypeObject *defining_class, int follow_symlinks) /*[clinic end generated code: output=23f803e19c3e780e input=e816273c4e67ee98]*/ { - if (!follow_symlinks) { + if (!follow_symlinks) return DirEntry_get_lstat(defining_class, self); - } - if (!self->stat) { + if (!FT_ATOMIC_LOAD_PTR(self->stat)) { + PyObject *stat; + PyObject *null_ptr = NULL; int result = os_DirEntry_is_symlink_impl(self, defining_class); if (result == -1) { return NULL; } if (result) { PyObject *module = PyType_GetModule(defining_class); - self->stat = DirEntry_fetch_stat(module, self, 1); + stat = DirEntry_fetch_stat(module, self, 1); } else { - self->stat = DirEntry_get_lstat(defining_class, self); + stat = DirEntry_get_lstat(defining_class, self); } + if (!_Py_atomic_compare_exchange_ptr(&self->stat, &null_ptr, stat)) + Py_XDECREF(stat); } - return Py_XNewRef(self->stat); + return Py_XNewRef(FT_ATOMIC_LOAD_PTR(self->stat)); } /* Set exception and return -1 on error, 0 for False, 1 for True */ @@ -16031,7 +16059,7 @@ join_path_filenameW(const wchar_t *path_wide, const wchar_t *filename) } static PyObject * -DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) +DirEntry_from_os(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) { DirEntry *entry; BY_HANDLE_FILE_INFORMATION file_info; @@ -16121,15 +16149,12 @@ join_path_filename(const char *path_narrow, const char* filename, Py_ssize_t fil } static PyObject * -DirEntry_from_posix_info(PyObject *module, path_t *path, const char *name, - Py_ssize_t name_len, ino_t d_ino -#ifdef HAVE_DIRENT_D_TYPE - , unsigned char d_type -#endif - ) +DirEntry_from_os(PyObject *module, path_t *path, struct dirent *direntp) { DirEntry *entry; char *joined_path; + const char *name = direntp->d_name; + Py_ssize_t name_len = NAMLEN(direntp); PyObject *DirEntryType = get_posix_state(module)->DirEntryType; entry = PyObject_New(DirEntry, (PyTypeObject *)DirEntryType); @@ -16172,9 +16197,9 @@ DirEntry_from_posix_info(PyObject *module, path_t *path, const char *name, goto error; #ifdef HAVE_DIRENT_D_TYPE - entry->d_type = d_type; + entry->d_type = direntp->d_type; #endif - entry->d_ino = d_ino; + entry->d_ino = direntp->d_ino; return (PyObject *)entry; @@ -16199,6 +16224,7 @@ typedef struct { #ifdef HAVE_FDOPENDIR int fd; #endif + PyMutex mutex; } ScandirIterator; #define ScandirIterator_CAST(op) ((ScandirIterator *)(op)) @@ -16208,66 +16234,51 @@ typedef struct { static int ScandirIterator_is_closed(ScandirIterator *iterator) { - return iterator->handle == INVALID_HANDLE_VALUE; + return _Py_atomic_load_ptr_relaxed(&iterator->handle) == INVALID_HANDLE_VALUE; } static void ScandirIterator_closedir(ScandirIterator *iterator) { + PyMutex_Lock(&iterator->mutex); HANDLE handle = iterator->handle; + iterator->handle = INVALID_HANDLE_VALUE; + PyMutex_Unlock(&iterator->mutex); - if (handle == INVALID_HANDLE_VALUE) + if (handle == INVALID_HANDLE_VALUE) { return; + } - iterator->handle = INVALID_HANDLE_VALUE; Py_BEGIN_ALLOW_THREADS FindClose(handle); Py_END_ALLOW_THREADS } -static PyObject * -ScandirIterator_iternext(PyObject *op) -{ - ScandirIterator *iterator = ScandirIterator_CAST(op); - WIN32_FIND_DATAW *file_data = &iterator->file_data; - BOOL success; - PyObject *entry; - - /* Happens if the iterator is iterated twice, or closed explicitly */ - if (iterator->handle == INVALID_HANDLE_VALUE) - return NULL; - - while (1) { - if (!iterator->first_time) { +static BOOL +ScandirIterator_nextdirentry(ScandirIterator *iterator, WIN32_FIND_DATAW *file_data) +{ + BOOL has_error = FALSE; + BOOL has_result = FALSE; + PyMutex_Lock(&iterator->mutex); + if (iterator->handle != INVALID_HANDLE_VALUE) { + if (iterator->first_time) { + has_result = TRUE; + memcpy(file_data, &iterator->file_data, sizeof(WIN32_FIND_DATAW)); + iterator->first_time = 0; + } else { Py_BEGIN_ALLOW_THREADS - success = FindNextFileW(iterator->handle, file_data); + has_result = FindNextFileW(iterator->handle, file_data); Py_END_ALLOW_THREADS - if (!success) { - /* Error or no more files */ - if (GetLastError() != ERROR_NO_MORE_FILES) - path_error(&iterator->path); - break; - } - } - iterator->first_time = 0; - - /* Skip over . and .. */ - if (wcscmp(file_data->cFileName, L".") != 0 && - wcscmp(file_data->cFileName, L"..") != 0) - { - PyObject *module = PyType_GetModule(Py_TYPE(iterator)); - entry = DirEntry_from_find_data(module, &iterator->path, file_data); - if (!entry) - break; - return entry; + has_error = !has_result && GetLastError() != ERROR_NO_MORE_FILES; } - - /* Loop till we get a non-dot directory or finish iterating */ } + PyMutex_Unlock(&iterator->mutex); /* Error or no more files */ - ScandirIterator_closedir(iterator); - return NULL; + if (has_error) + path_error(&iterator->path); + + return has_result; } #else /* POSIX */ @@ -16275,18 +16286,21 @@ ScandirIterator_iternext(PyObject *op) static int ScandirIterator_is_closed(ScandirIterator *iterator) { - return !iterator->dirp; + return !_Py_atomic_load_ptr_relaxed(&iterator->dirp); } static void ScandirIterator_closedir(ScandirIterator *iterator) { + PyMutex_Lock(&iterator->mutex); DIR *dirp = iterator->dirp; + iterator->dirp = NULL; + PyMutex_Unlock(&iterator->mutex); - if (!dirp) + if (!dirp) { return; + } - iterator->dirp = NULL; Py_BEGIN_ALLOW_THREADS #ifdef HAVE_FDOPENDIR if (iterator->path.fd != -1) @@ -16297,60 +16311,82 @@ ScandirIterator_closedir(ScandirIterator *iterator) return; } -static PyObject * -ScandirIterator_iternext(PyObject *op) +static int +ScandirIterator_nextdirentry(ScandirIterator *iterator, struct dirent *direntp) { - ScandirIterator *iterator = ScandirIterator_CAST(op); - struct dirent *direntp; - Py_ssize_t name_len; - int is_dot; - PyObject *entry; - - /* Happens if the iterator is iterated twice, or closed explicitly */ - if (!iterator->dirp) - return NULL; - - while (1) { + int has_error = 0; + struct dirent *result = NULL; + PyMutex_Lock(&iterator->mutex); + if (iterator->dirp) { errno = 0; Py_BEGIN_ALLOW_THREADS - direntp = readdir(iterator->dirp); + result = readdir(iterator->dirp); Py_END_ALLOW_THREADS + has_error = !result && errno != 0; + } - if (!direntp) { - /* Error or no more files */ - if (errno != 0) - path_error(&iterator->path); - break; - } + /* We need to make a copy of the result before releasing the lock */ + if (result) { + + /* A note on calculating the size: the dirent structure may be variable + * length with the d_name field being a flexible array member, so we + * cannot just use sizeof(struct dirent). On Linux there is a d_reclen + * field, however that is not guaranteed to exist (and doesn't on WASI, + * which happens to be the platform where this is an issue). Hence we + * calculate the relevant record size thus. */ + size_t reclen = offsetof(struct dirent, d_name) + strlen(result->d_name) + 1; + memcpy(direntp, result, reclen); + } + PyMutex_Unlock(&iterator->mutex); + + /* Error or no more files */ + if (has_error) + path_error(&iterator->path); + + return result != NULL; +} - /* Skip over . and .. */ - name_len = NAMLEN(direntp); - is_dot = direntp->d_name[0] == '.' && - (name_len == 1 || (direntp->d_name[1] == '.' && name_len == 2)); - if (!is_dot) { - PyObject *module = PyType_GetModule(Py_TYPE(iterator)); - entry = DirEntry_from_posix_info(module, - &iterator->path, direntp->d_name, - name_len, direntp->d_ino -#ifdef HAVE_DIRENT_D_TYPE - , direntp->d_type #endif - ); - if (!entry) - break; - return entry; - } - /* Loop till we get a non-dot directory or finish iterating */ +static PyObject * +ScandirIterator_iternext(PyObject *op) +{ + ScandirIterator *iterator = ScandirIterator_CAST(op); +#ifdef MS_WINDOWS + WIN32_FIND_DATAW dirent; +#else + /* + * This may be a variable length structure, with the final "d_name" field + * possibly being a flexible array member. In that case allocate enough + * extra space on the stack, immediately following the structure, + * sufficient to handle the longest possible filename. */ + struct dirent dirent; + if (sizeof(dirent) - offsetof(struct dirent, d_name) < (NAME_MAX + 1) + && !alloca(NAME_MAX + 1 - (sizeof(dirent) - offsetof(struct dirent, d_name)))) { + PyErr_NoMemory(); + goto error; + } +#endif + + while ((ScandirIterator_nextdirentry(iterator, &dirent))) { + + /* Skip over . and .. */ + if (_is_dot_filename(&dirent)) + continue; + + PyObject *module = PyType_GetModule(Py_TYPE(iterator)); + PyObject *entry = DirEntry_from_os(module, &iterator->path, &dirent); + if (!entry) + break; + return entry; } - /* Error or no more files */ + /* Already closed, error, or no more files */ +error: ScandirIterator_closedir(iterator); return NULL; } -#endif - static PyObject * ScandirIterator_close(PyObject *op, PyObject *Py_UNUSED(dummy)) { @@ -16477,6 +16513,7 @@ os_scandir_impl(PyObject *module, path_t *path) if (!iterator) return NULL; + iterator->mutex = (PyMutex){0}; #ifdef MS_WINDOWS iterator->handle = INVALID_HANDLE_VALUE; #else