From 2f299af367570cce715374046de6f7cf6b19bf32 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 12 Mar 2024 17:49:10 +0000 Subject: [PATCH 01/19] Disable GIL by default --- Lib/test/test_cmd_line.py | 2 +- Lib/test/test_embed.py | 2 +- Python/initconfig.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index fb832aed3152ff..50439086d09041 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -883,7 +883,7 @@ def test_pythondevmode_env(self): def test_python_gil(self): cases = [ # (env, opt, expected, msg) - (None, None, 'None', "no options set"), + (None, None, '0', "no options set"), ('0', None, '0', "PYTHON_GIL=0"), ('1', None, '1', "PYTHON_GIL=1"), ('1', '0', '0', "-X gil=0 overrides PYTHON_GIL=1"), diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index ab1d579ed12755..b9f83d69a6d662 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -524,7 +524,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): if support.Py_DEBUG: CONFIG_COMPAT['run_presite'] = None if support.Py_GIL_DISABLED: - CONFIG_COMPAT['enable_gil'] = -1 + CONFIG_COMPAT['enable_gil'] = 0 if MS_WINDOWS: CONFIG_COMPAT.update({ 'legacy_windows_stdio': 0, diff --git a/Python/initconfig.c b/Python/initconfig.c index 215d6a1d4e0dba..958732d068649a 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -844,7 +844,7 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->code_debug_ranges = 1; config->cpu_count = -1; #ifdef Py_GIL_DISABLED - config->enable_gil = _PyConfig_GIL_DEFAULT; + config->enable_gil = _PyConfig_GIL_DISABLE; #endif } From 3d270d2e86711440ae056ea600bbbd7332443717 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 8 Apr 2024 22:17:24 +0000 Subject: [PATCH 02/19] gh-112075: Lock when inserting into shared keys --- Objects/dictobject.c | 226 ++++++++++++++++++++----------------------- 1 file changed, 103 insertions(+), 123 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index e7993e4b051433..341b4f7f6620c2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1606,31 +1606,6 @@ insertion_resize(PyInterpreterState *interp, PyDictObject *mp, int unicode) return dictresize(interp, mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode); } -static Py_ssize_t -insert_into_splitdictkeys(PyDictKeysObject *keys, PyObject *name, Py_hash_t hash) -{ - assert(PyUnicode_CheckExact(name)); - ASSERT_KEYS_LOCKED(keys); - - Py_ssize_t ix = unicodekeys_lookup_unicode(keys, name, hash); - if (ix == DKIX_EMPTY) { - if (keys->dk_usable <= 0) { - return DKIX_EMPTY; - } - /* Insert into new slot. */ - keys->dk_version = 0; - Py_ssize_t hashpos = find_empty_slot(keys, hash); - ix = keys->dk_nentries; - PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(keys)[ix]; - dictkeys_set_index(keys, hashpos, ix); - assert(ep->me_key == NULL); - ep->me_key = Py_NewRef(name); - split_keys_entry_added(keys); - } - assert (ix < SHARED_KEYS_MAX_SIZE); - return ix; -} - static inline int insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, Py_hash_t hash, PyObject *key, PyObject *value) @@ -1665,38 +1640,55 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, } static int -insert_split_dict(PyInterpreterState *interp, PyDictObject *mp, - Py_hash_t hash, PyObject *key, PyObject *value) +insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) { - PyDictKeysObject *keys = mp->ma_keys; - LOCK_KEYS(keys); - if (keys->dk_usable <= 0) { - /* Need to resize. */ - UNLOCK_KEYS(keys); - int ins = insertion_resize(interp, mp, 1); - if (ins < 0) { - return -1; - } - assert(!_PyDict_HasSplitTable(mp)); - return insert_combined_dict(interp, mp, hash, key, value); - } - - Py_ssize_t hashpos = find_empty_slot(keys, hash); - dictkeys_set_index(keys, hashpos, keys->dk_nentries); - - PyDictUnicodeEntry *ep; - ep = &DK_UNICODE_ENTRIES(keys)[keys->dk_nentries]; - STORE_SHARED_KEY(ep->me_key, key); + assert(PyUnicode_CheckExact(key)); + Py_ssize_t ix; - Py_ssize_t index = keys->dk_nentries; - _PyDictValues_AddToInsertionOrder(mp->ma_values, index); - assert (mp->ma_values->values[index] == NULL); - STORE_SPLIT_VALUE(mp, index, value); +#ifdef Py_GIL_DISABLED + ix = unicodekeys_lookup_unicode_threadsafe(keys, key, hash); + if (ix >= 0) { + return ix; + } +#endif - split_keys_entry_added(keys); - assert(keys->dk_usable >= 0); + LOCK_KEYS(keys); + ix = unicodekeys_lookup_unicode(keys, key, hash); + if (ix == DKIX_EMPTY && keys->dk_usable > 0) { + // Insert into new slot + keys->dk_version = 0; + Py_ssize_t hashpos = find_empty_slot(keys, hash); + ix = keys->dk_nentries; + dictkeys_set_index(keys, hashpos, ix); + PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(keys)[ix]; + STORE_SHARED_KEY(ep->me_key, Py_NewRef(key)); + split_keys_entry_added(keys); + } + assert (ix < SHARED_KEYS_MAX_SIZE); UNLOCK_KEYS(keys); - return 0; + return ix; +} + +static void +insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, PyObject *value, Py_ssize_t ix) +{ + assert(PyUnicode_CheckExact(key)); + MAINTAIN_TRACKING(mp, key, value); + PyObject *old_value = mp->ma_values->values[ix]; + if (old_value == NULL) { + uint64_t new_version = _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); + STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); + _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); + mp->ma_used++; + mp->ma_version_tag = new_version; + } + else { + uint64_t new_version = _PyDict_NotifyEvent(interp, PyDict_EVENT_MODIFIED, mp, key, value); + STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); + mp->ma_version_tag = new_version; + Py_DECREF(old_value); + } + ASSERT_CONSISTENT(mp); } /* @@ -1719,6 +1711,21 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); } + if (mp->ma_keys->dk_kind == DICT_KEYS_SPLIT) { + Py_ssize_t ix = insert_split_key(mp->ma_keys, key, hash); + if (ix != DKIX_EMPTY) { + insert_split_value(interp, mp, key, value, ix); + Py_DECREF(key); + Py_DECREF(value); + return 0; + } + + /* No space in shared keys. Resize and continue below. */ + if (insertion_resize(interp, mp, 1) < 0) { + goto Fail; + } + } + Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); if (ix == DKIX_ERROR) goto Fail; @@ -1731,17 +1738,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, /* Insert into new slot. */ mp->ma_keys->dk_version = 0; assert(old_value == NULL); - - if (!_PyDict_HasSplitTable(mp)) { - if (insert_combined_dict(interp, mp, hash, key, value) < 0) { - goto Fail; - } - } - else { - if (insert_split_dict(interp, mp, hash, key, value) < 0) - goto Fail; + if (insert_combined_dict(interp, mp, hash, key, value) < 0) { + goto Fail; } - mp->ma_used++; mp->ma_version_tag = new_version; ASSERT_CONSISTENT(mp); @@ -1751,21 +1750,12 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, if (old_value != value) { uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_MODIFIED, mp, key, value); - if (_PyDict_HasSplitTable(mp)) { - mp->ma_values->values[ix] = value; - if (old_value == NULL) { - _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); - mp->ma_used++; - } + assert(old_value != NULL); + if (DK_IS_UNICODE(mp->ma_keys)) { + DK_UNICODE_ENTRIES(mp->ma_keys)[ix].me_value = value; } else { - assert(old_value != NULL); - if (DK_IS_UNICODE(mp->ma_keys)) { - DK_UNICODE_ENTRIES(mp->ma_keys)[ix].me_value = value; - } - else { - DK_ENTRIES(mp->ma_keys)[ix].me_value = value; - } + DK_ENTRIES(mp->ma_keys)[ix].me_value = value; } mp->ma_version_tag = new_version; } @@ -4173,6 +4163,29 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } } + if (mp->ma_keys->dk_kind == DICT_KEYS_SPLIT) { + Py_ssize_t ix = insert_split_key(mp->ma_keys, key, hash); + if (ix != DKIX_EMPTY) { + PyObject *value = mp->ma_values->values[ix]; + int already_present = value != NULL; + if (!already_present) { + insert_split_value(interp, mp, key, default_value, ix); + value = default_value; + } + if (result) { + *result = incref_result ? Py_NewRef(value) : value; + } + return already_present; + } + + /* No space in shared keys. Resize and continue below. */ + if (insertion_resize(interp, mp, 1) < 0) { + goto error; + } + } + + assert(!_PyDict_HasSplitTable(mp)); + Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &value); if (ix == DKIX_ERROR) { if (result) { @@ -4187,25 +4200,13 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu mp->ma_keys->dk_version = 0; value = default_value; - if (!_PyDict_HasSplitTable(mp)) { - if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { - Py_DECREF(key); - Py_DECREF(value); - if (result) { - *result = NULL; - } - return -1; - } - } - else { - if (insert_split_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { - Py_DECREF(key); - Py_DECREF(value); - if (result) { - *result = NULL; - } - return -1; + if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { + Py_DECREF(key); + Py_DECREF(value); + if (result) { + *result = NULL; } + return -1; } MAINTAIN_TRACKING(mp, key, value); @@ -4218,29 +4219,19 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } return 0; } - else if (value == NULL) { - uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, default_value); - value = default_value; - assert(_PyDict_HasSplitTable(mp)); - assert(mp->ma_values->values[ix] == NULL); - MAINTAIN_TRACKING(mp, key, value); - mp->ma_values->values[ix] = Py_NewRef(value); - _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); - mp->ma_used++; - mp->ma_version_tag = new_version; - ASSERT_CONSISTENT(mp); - if (result) { - *result = incref_result ? Py_NewRef(value) : value; - } - return 0; - } + assert(value != NULL); ASSERT_CONSISTENT(mp); if (result) { *result = incref_result ? Py_NewRef(value) : value; } return 1; + +error: + if (result) { + *result = NULL; + } + return -1; } int @@ -6644,18 +6635,7 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, assert(hash != -1); } -#ifdef Py_GIL_DISABLED - // Try a thread-safe lookup to see if the index is already allocated - ix = unicodekeys_lookup_unicode_threadsafe(keys, name, hash); - if (ix == DKIX_EMPTY || ix == DKIX_KEY_CHANGED) { - // Lock keys and do insert - LOCK_KEYS(keys); - ix = insert_into_splitdictkeys(keys, name, hash); - UNLOCK_KEYS(keys); - } -#else - ix = insert_into_splitdictkeys(keys, name, hash); -#endif + ix = insert_split_key(keys, name, hash); #ifdef Py_STATS if (ix == DKIX_EMPTY) { From 2e5797eb42c2db2ef7228e96e03339202a9b5257 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 15 Apr 2024 20:06:19 +0000 Subject: [PATCH 03/19] Fix return type --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 341b4f7f6620c2..c8176249af6550 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1639,7 +1639,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, return 0; } -static int +static Py_ssize_t insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) { assert(PyUnicode_CheckExact(key)); From 79eb85c6a29bf8c5d141d723de0d91179fd8f785 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 2 Apr 2024 11:29:09 -0700 Subject: [PATCH 04/19] Make instance attributes stored in inline "dict" thread safe --- Include/cpython/object.h | 1 + Include/internal/pycore_dict.h | 19 +- Include/internal/pycore_object.h | 16 +- .../internal/pycore_pyatomic_ft_wrappers.h | 9 + Objects/dictobject.c | 305 ++++++++++++++---- Objects/object.c | 49 ++- Objects/typeobject.c | 22 +- Python/bytecodes.c | 15 +- Python/executor_cases.c.h | 10 +- Python/generated_cases.c.h | 13 +- Python/specialize.c | 3 +- Tools/cases_generator/analyzer.py | 1 + 12 files changed, 335 insertions(+), 128 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 2797051281f3b4..a8f57827a964cd 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -493,6 +493,7 @@ do { \ PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj); PyAPI_FUNC(int) PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg); +PyAPI_FUNC(void) _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict); PyAPI_FUNC(void) PyObject_ClearManagedDict(PyObject *obj); #define TYPE_MAX_WATCHERS 8 diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index fba0dfc40714ec..f33026dbd6be58 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -1,4 +1,3 @@ - #ifndef Py_INTERNAL_DICT_H #define Py_INTERNAL_DICT_H #ifdef __cplusplus @@ -9,9 +8,10 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include "pycore_freelist.h" // _PyFreeListState -#include "pycore_identifier.h" // _Py_Identifier -#include "pycore_object.h" // PyManagedDictPointer +#include "pycore_freelist.h" // _PyFreeListState +#include "pycore_identifier.h" // _Py_Identifier +#include "pycore_object.h" // PyManagedDictPointer +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_ACQUIRE // Unsafe flavor of PyDict_GetItemWithError(): no error checking extern PyObject* _PyDict_GetItemWithError(PyObject *dp, PyObject *key); @@ -249,7 +249,7 @@ _PyDict_NotifyEvent(PyInterpreterState *interp, return DICT_NEXT_VERSION(interp) | (mp->ma_version_tag & DICT_WATCHER_AND_MODIFICATION_MASK); } -extern PyDictObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj); +extern PyDictObject *_PyObject_MaterializeManagedDict(PyObject *obj); PyAPI_FUNC(PyObject *)_PyDict_FromItems( PyObject *const *keys, Py_ssize_t keys_offset, @@ -277,7 +277,6 @@ _PyDictValues_AddToInsertionOrder(PyDictValues *values, Py_ssize_t ix) static inline size_t shared_keys_usable_size(PyDictKeysObject *keys) { -#ifdef Py_GIL_DISABLED // dk_usable will decrease for each instance that is created and each // value that is added. dk_nentries will increase for each value that // is added. We want to always return the right value or larger. @@ -285,11 +284,9 @@ shared_keys_usable_size(PyDictKeysObject *keys) // second, and conversely here we read dk_usable first and dk_entries // second (to avoid the case where we read entries before the increment // and read usable after the decrement) - return (size_t)(_Py_atomic_load_ssize_acquire(&keys->dk_usable) + - _Py_atomic_load_ssize_acquire(&keys->dk_nentries)); -#else - return (size_t)keys->dk_nentries + (size_t)keys->dk_usable; -#endif + Py_ssize_t dk_usable = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_usable); + Py_ssize_t dk_nentries = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_nentries); + return dk_nentries + dk_usable; } static inline size_t diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 512f7a35f50e38..8ae83879562e9a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -12,6 +12,7 @@ extern "C" { #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() #include "pycore_emscripten_trampoline.h" // _PyCFunction_TrampolineCall() #include "pycore_interp.h" // PyInterpreterState.gc +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED #include "pycore_pystate.h" // _PyInterpreterState_GET() /* Check if an object is consistent. For example, ensure that the reference @@ -659,10 +660,10 @@ extern PyObject* _PyType_GetDocFromInternalDoc(const char *, const char *); extern PyObject* _PyType_GetTextSignatureFromInternalDoc(const char *, const char *, int); void _PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp); -extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, - PyObject *name, PyObject *value); -PyObject * _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values, - PyObject *name); +extern int _PyObject_TryStoreInstanceAttribute(PyObject *obj, + PyObject *name, PyObject *value); +extern int _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, + PyObject **attr); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) @@ -683,6 +684,13 @@ _PyObject_ManagedDictPointer(PyObject *obj) return (PyManagedDictPointer *)((char *)obj + MANAGED_DICT_OFFSET); } +static inline PyDictObject * +_PyObject_GetManagedDict(PyObject *obj) +{ + PyManagedDictPointer *dorv = _PyObject_ManagedDictPointer(obj); + return (PyDictObject *)FT_ATOMIC_LOAD_PTR_RELAXED(dorv->dict); +} + static inline PyDictValues * _PyObject_InlineValues(PyObject *obj) { diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 2514f51f1b0086..cd049ab3e9972f 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -22,10 +22,16 @@ extern "C" { #ifdef Py_GIL_DISABLED #define FT_ATOMIC_LOAD_PTR(value) _Py_atomic_load_ptr(&value) #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) +#define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) \ + _Py_atomic_load_ssize_acquire(&value) #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ _Py_atomic_load_ssize_relaxed(&value) #define FT_ATOMIC_STORE_PTR(value, new_value) \ _Py_atomic_store_ptr(&value, new_value) +#define FT_ATOMIC_LOAD_PTR_RELAXED(value) \ + _Py_atomic_load_ptr_relaxed(&value) +#define FT_ATOMIC_LOAD_UINT8_RELAXED(value) \ + _Py_atomic_load_uint8_relaxed(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ @@ -35,8 +41,11 @@ extern "C" { #else #define FT_ATOMIC_LOAD_PTR(value) value #define FT_ATOMIC_LOAD_SSIZE(value) value +#define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_PTR_RELAXED(value) value +#define FT_ATOMIC_LOAD_UINT8_RELAXED(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c3516dff973745..e6976f8a534522 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1752,7 +1752,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_MODIFIED, mp, key, value); if (_PyDict_HasSplitTable(mp)) { - mp->ma_values->values[ix] = value; + STORE_SPLIT_VALUE(mp, ix, value); if (old_value == NULL) { _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); mp->ma_used++; @@ -2514,7 +2514,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, mp->ma_version_tag = new_version; if (_PyDict_HasSplitTable(mp)) { assert(old_value == mp->ma_values->values[ix]); - mp->ma_values->values[ix] = NULL; + STORE_SPLIT_VALUE(mp, ix, NULL); assert(ix < SHARED_KEYS_MAX_SIZE); /* Update order */ delete_index_from_values(mp->ma_values, ix); @@ -4226,7 +4226,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu assert(_PyDict_HasSplitTable(mp)); assert(mp->ma_values->values[ix] == NULL); MAINTAIN_TRACKING(mp, key, value); - mp->ma_values->values[ix] = Py_NewRef(value); + STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); mp->ma_used++; mp->ma_version_tag = new_version; @@ -6617,27 +6617,54 @@ make_dict_from_instance_attributes(PyInterpreterState *interp, } PyDictObject * -_PyObject_MakeDictFromInstanceAttributes(PyObject *obj) +_PyObject_MaterializeManagedDict(PyObject *obj) { + PyDictObject *dict = _PyObject_GetManagedDict(obj); + if (dict != NULL) { + return dict; + } + + Py_BEGIN_CRITICAL_SECTION(obj); + +#ifdef Py_GIL_DISABLED + dict = _PyObject_GetManagedDict(obj); + if (dict != NULL) { + // We raced with another thread creating the dict + goto exit; + } +#endif + + OBJECT_STAT_INC(dict_materialized_on_request); + PyDictValues *values = _PyObject_InlineValues(obj); PyInterpreterState *interp = _PyInterpreterState_GET(); PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); OBJECT_STAT_INC(dict_materialized_on_request); - return make_dict_from_instance_attributes(interp, keys, values); + dict = make_dict_from_instance_attributes(interp, keys, values); + + FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)dict); + +#ifdef Py_GIL_DISABLED +exit: +#endif + Py_END_CRITICAL_SECTION(); + return dict; } -int -_PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, +static int +store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, PyObject *name, PyObject *value) { - PyInterpreterState *interp = _PyInterpreterState_GET(); PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); assert(keys != NULL); assert(values != NULL); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES); Py_ssize_t ix = DKIX_EMPTY; + PyDictObject *dict = _PyObject_GetManagedDict(obj); + assert(dict == NULL || ((PyDictObject *)dict)->ma_values == values); if (PyUnicode_CheckExact(name)) { Py_hash_t hash = unicode_get_hash(name); if (hash == -1) { @@ -6674,25 +6701,21 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, } #endif } - PyDictObject *dict = _PyObject_ManagedDictPointer(obj)->dict; + if (ix == DKIX_EMPTY) { if (dict == NULL) { - dict = make_dict_from_instance_attributes( - interp, keys, values); + dict = _PyObject_MaterializeManagedDict(obj); if (dict == NULL) { return -1; } - _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)dict; - } - if (value == NULL) { - return PyDict_DelItem((PyObject *)dict, name); - } - else { - return PyDict_SetItem((PyObject *)dict, name, value); } + // Caller needs to insert into materialized dict + return 1; } + PyObject *old_value = values->values[ix]; - values->values[ix] = Py_XNewRef(value); + FT_ATOMIC_STORE_PTR_RELEASE(values->values[ix], Py_XNewRef(value)); + if (old_value == NULL) { if (value == NULL) { PyErr_Format(PyExc_AttributeError, @@ -6719,6 +6742,63 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, return 0; } +int +_PyObject_TryStoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value) +{ + PyDictValues *values = _PyObject_InlineValues(obj); + if (!FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { + return 1; + } + +#ifdef Py_GIL_DISABLED + int res; + // We have a valid inline values, at least for now... There are two potential + // races with having the values become invalid. One is the dictionary + // being detached from the object. The other is if someone is inserting + // into the dictionary directly and therefore causing it to resize. + // + // If we haven't materialized the dictionary yet we lock on the object, which + // will also be used to prevent the dictionary from being materialized while + // we're doing the insertion. If we race and the dictionary gets created + // then we'll need to release the object lock and lock the dictionary to + // prevent resizing. + PyDictObject *dict = _PyObject_GetManagedDict(obj); + if (dict == NULL) { + int retry_with_dict = 0; + Py_BEGIN_CRITICAL_SECTION(obj); + dict = _PyObject_GetManagedDict(obj); + + if (dict == NULL) { + res = store_instance_attr_lock_held(obj, values, name, value); + } + else { + // We lost a race with the materialization of the dict, we'll + // try the insert with it... + retry_with_dict = 1; + } + Py_END_CRITICAL_SECTION(); + if (retry_with_dict) { + goto with_dict; + } + } + else { +with_dict: + Py_BEGIN_CRITICAL_SECTION(dict); + if (dict->ma_values == values) { + res = store_instance_attr_lock_held(obj, values, name, value); + } + else { + // Caller needs to insert into dict + res = 1; + } + Py_END_CRITICAL_SECTION(); + } + return res; +#else + return store_instance_attr_lock_held(obj, values, name, value); +#endif +} + /* Sanity check for managed dicts */ #if 0 #define CHECK(val) assert(val); if (!(val)) { return 0; } @@ -6750,19 +6830,80 @@ _PyObject_ManagedDictValidityCheck(PyObject *obj) } #endif -PyObject * -_PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values, - PyObject *name) +// Attempts to get an instance attribute from the inline values. Returns 0 if +// the lookup from the inline values was successful or 1 if the inline values +// are no longer valid. No error is set in either case. +int +_PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr) { assert(PyUnicode_CheckExact(name)); + PyDictValues *values = _PyObject_InlineValues(obj); + if (!FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { + return 1; + } + PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); assert(keys != NULL); Py_ssize_t ix = _PyDictKeys_StringLookup(keys, name); if (ix == DKIX_EMPTY) { - return NULL; + *attr = NULL; + return 0; + } + +#ifdef Py_GIL_DISABLED + PyObject *value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); + if (value == NULL || _Py_TryIncrefCompare(&values->values[ix], value)) { + *attr = value; + return 0; + } + + PyDictObject *dict = _PyObject_GetManagedDict(obj); + if (dict == NULL) { + // No dict, lock the object to prevent one from being + // materialized... + bool success = false; + Py_BEGIN_CRITICAL_SECTION(obj); + + dict = _PyObject_GetManagedDict(obj); + if (dict == NULL) { + // Still no dict, we can read from the values + assert(values->valid); + value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); + *attr = Py_XNewRef(value); + success = true; + } + + Py_END_CRITICAL_SECTION(); + + if (success) { + return 0; + } + } + + // We have a dictionary, we'll need to lock it to prevent + // the values from being resized. + assert(dict != NULL); + int res; + Py_BEGIN_CRITICAL_SECTION(dict); + + if (dict->ma_values == values && + FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { + value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); + *attr = Py_XNewRef(value); + res = 0; + } else { + // Caller needs to lookup from the dictionary + res = 1; } + + Py_END_CRITICAL_SECTION(); + + return res; +#else PyObject *value = values->values[ix]; - return Py_XNewRef(value); + *attr = Py_XNewRef(value); + return 0; +#endif } int @@ -6775,20 +6916,19 @@ _PyObject_IsInstanceDictEmpty(PyObject *obj) PyDictObject *dict; if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); - if (values->valid) { + if (FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { PyDictKeysObject *keys = CACHED_KEYS(tp); for (Py_ssize_t i = 0; i < keys->dk_nentries; i++) { - if (values->values[i] != NULL) { + if (FT_ATOMIC_LOAD_PTR_RELAXED(values->values[i]) != NULL) { return 0; } } return 1; } - dict = _PyObject_ManagedDictPointer(obj)->dict; + dict = _PyObject_GetManagedDict(obj); } else if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - PyManagedDictPointer* managed_dict = _PyObject_ManagedDictPointer(obj); - dict = managed_dict->dict; + dict = _PyObject_GetManagedDict(obj); } else { PyObject **dictptr = _PyObject_ComputedDictPointer(obj); @@ -6820,8 +6960,33 @@ PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) return 0; } +static bool +set_dict_inline_values(PyObject *obj, PyObject *new_dict) +{ + PyDictValues *values = _PyObject_InlineValues(obj); + + if (values->valid) { + for (Py_ssize_t i = 0; i < values->capacity; i++) { + Py_CLEAR(values->values[i]); + } + values->valid = 0; + } + +#ifdef Py_GIL_DISABLED + PyDictObject *dict = _PyObject_ManagedDictPointer(obj)->dict; + if (dict != NULL) { + // We lost a race with materialization of the dict + return false; + } +#endif + + Py_XINCREF(new_dict); + _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)new_dict; + return true; +} + void -PyObject_ClearManagedDict(PyObject *obj) +_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) { assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); assert(_PyObject_InlineValuesConsistencyCheck(obj)); @@ -6829,29 +6994,61 @@ PyObject_ClearManagedDict(PyObject *obj) if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictObject *dict = _PyObject_ManagedDictPointer(obj)->dict; if (dict) { +#ifdef Py_GIL_DISABLED +clear_dict: +#endif + Py_BEGIN_CRITICAL_SECTION2(dict, obj); + _PyDict_DetachFromObject(dict, obj); - _PyObject_ManagedDictPointer(obj)->dict = NULL; + + Py_XINCREF(new_dict); + _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)new_dict; + + Py_END_CRITICAL_SECTION2(); + Py_DECREF(dict); } else { - PyDictValues *values = _PyObject_InlineValues(obj); - if (values->valid) { - for (Py_ssize_t i = 0; i < values->capacity; i++) { - Py_CLEAR(values->values[i]); - } - values->valid = 0; + bool success; + Py_BEGIN_CRITICAL_SECTION(obj); + + success = set_dict_inline_values(obj, new_dict); + + Py_END_CRITICAL_SECTION(); + + (void)success; // suppress warning when GIL isn't disabled + +#ifdef Py_GIL_DISABLED + if (!success) { + dict = _PyObject_ManagedDictPointer(obj)->dict; + assert(dict != NULL); + goto clear_dict; } +#endif } } else { - Py_CLEAR(_PyObject_ManagedDictPointer(obj)->dict); + Py_BEGIN_CRITICAL_SECTION(obj); + + Py_XSETREF(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)Py_XNewRef(new_dict)); + + Py_END_CRITICAL_SECTION(); } assert(_PyObject_InlineValuesConsistencyCheck(obj)); } +void +PyObject_ClearManagedDict(PyObject *obj) +{ + _PyObject_SetManagedDict(obj, NULL); +} + int _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); + assert(_PyObject_ManagedDictPointer(obj)->dict == mp); assert(_PyObject_InlineValuesConsistencyCheck(obj)); if (mp->ma_values == NULL || mp->ma_values != _PyObject_InlineValues(obj)) { @@ -6867,6 +7064,7 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) if (mp->ma_values == NULL) { return -1; } + assert(_PyObject_InlineValuesConsistencyCheck(obj)); ASSERT_CONSISTENT(mp); return 0; @@ -6877,29 +7075,28 @@ PyObject_GenericGetDict(PyObject *obj, void *context) { PyInterpreterState *interp = _PyInterpreterState_GET(); PyTypeObject *tp = Py_TYPE(obj); + PyDictObject *dict; if (_PyType_HasFeature(tp, Py_TPFLAGS_MANAGED_DICT)) { - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(obj); - PyDictObject *dict = managed_dict->dict; + dict = _PyObject_GetManagedDict(obj); if (dict == NULL && (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && - _PyObject_InlineValues(obj)->valid - ) { - PyDictValues *values = _PyObject_InlineValues(obj); - OBJECT_STAT_INC(dict_materialized_on_request); - dict = make_dict_from_instance_attributes( - interp, CACHED_KEYS(tp), values); - if (dict != NULL) { - managed_dict->dict = (PyDictObject *)dict; - } + _PyObject_InlineValues(obj)->valid) { + dict = _PyObject_MaterializeManagedDict(obj); } - else { - dict = managed_dict->dict; + else if (dict == NULL) { + Py_BEGIN_CRITICAL_SECTION(obj); + + // Check again that we're not racing with someone else creating the dict + dict = _PyObject_GetManagedDict(obj); if (dict == NULL) { - dictkeys_incref(CACHED_KEYS(tp)); OBJECT_STAT_INC(dict_materialized_on_request); + dictkeys_incref(CACHED_KEYS(tp)); dict = (PyDictObject *)new_dict_with_shared_keys(interp, CACHED_KEYS(tp)); - managed_dict->dict = (PyDictObject *)dict; + FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)dict); } + + Py_END_CRITICAL_SECTION(); } return Py_XNewRef((PyObject *)dict); } @@ -7109,7 +7306,7 @@ _PyObject_InlineValuesConsistencyCheck(PyObject *obj) return 1; } assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictObject *dict = (PyDictObject *)_PyObject_ManagedDictPointer(obj)->dict; + PyDictObject *dict = _PyObject_GetManagedDict(obj); if (dict == NULL) { return 1; } diff --git a/Objects/object.c b/Objects/object.c index 214e7c5b567928..4b12e8d1525ef0 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -6,6 +6,7 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _Py_EnterRecursiveCallTstate() #include "pycore_context.h" // _PyContextTokenMissing_Type +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION #include "pycore_descrobject.h" // _PyMethodWrapper_Type #include "pycore_dict.h" // _PyObject_MakeDictFromInstanceAttributes() #include "pycore_floatobject.h" // _PyFloat_DebugMallocStats() @@ -25,6 +26,7 @@ #include "pycore_typevarobject.h" // _PyTypeAlias_Type, _Py_initialize_generic #include "pycore_unionobject.h" // _PyUnion_Type + #ifdef Py_LIMITED_API // Prevent recursive call _Py_IncRef() <=> Py_INCREF() # error "Py_LIMITED_API macro must not be defined" @@ -1403,16 +1405,15 @@ _PyObject_GetDictPtr(PyObject *obj) if ((Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { return _PyObject_ComputedDictPointer(obj); } - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(obj); - if (managed_dict->dict == NULL && Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES) { - PyDictObject *dict = (PyDictObject *)_PyObject_MakeDictFromInstanceAttributes(obj); + PyDictObject *dict = _PyObject_GetManagedDict(obj); + if (dict == NULL && Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES) { + dict = _PyObject_MaterializeManagedDict(obj); if (dict == NULL) { PyErr_Clear(); return NULL; } - managed_dict->dict = dict; } - return (PyObject **)&managed_dict->dict; + return (PyObject **)&_PyObject_ManagedDictPointer(obj)->dict; } PyObject * @@ -1480,10 +1481,9 @@ _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) } } } - PyObject *dict; - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && _PyObject_InlineValues(obj)->valid) { - PyDictValues *values = _PyObject_InlineValues(obj); - PyObject *attr = _PyObject_GetInstanceAttribute(obj, values, name); + PyObject *dict, *attr; + if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && + !_PyObject_TryGetInstanceAttribute(obj, name, &attr)) { if (attr != NULL) { *method = attr; Py_XDECREF(descr); @@ -1492,8 +1492,7 @@ _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) dict = NULL; } else if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT)) { - PyManagedDictPointer* managed_dict = _PyObject_ManagedDictPointer(obj); - dict = (PyObject *)managed_dict->dict; + dict = (PyObject *)_PyObject_GetManagedDict(obj); } else { PyObject **dictptr = _PyObject_ComputedDictPointer(obj); @@ -1586,26 +1585,23 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, } } if (dict == NULL) { - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && _PyObject_InlineValues(obj)->valid) { - PyDictValues *values = _PyObject_InlineValues(obj); - if (PyUnicode_CheckExact(name)) { - res = _PyObject_GetInstanceAttribute(obj, values, name); + if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES)) { + if (PyUnicode_CheckExact(name) && + !_PyObject_TryGetInstanceAttribute(obj, name, &res)) { if (res != NULL) { goto done; } } else { - dict = (PyObject *)_PyObject_MakeDictFromInstanceAttributes(obj); + dict = (PyObject *)_PyObject_MaterializeManagedDict(obj); if (dict == NULL) { res = NULL; goto done; } - _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)dict; } } else if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT)) { - PyManagedDictPointer* managed_dict = _PyObject_ManagedDictPointer(obj); - dict = (PyObject *)managed_dict->dict; + dict = (PyObject *)_PyObject_GetManagedDict(obj); } else { PyObject **dictptr = _PyObject_ComputedDictPointer(obj); @@ -1700,12 +1696,15 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, if (dict == NULL) { PyObject **dictptr; - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && _PyObject_InlineValues(obj)->valid) { - res = _PyObject_StoreInstanceAttribute( - obj, _PyObject_InlineValues(obj), name, value); - goto error_check; + + if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES)) { + res = _PyObject_TryStoreInstanceAttribute(obj, name, value); + if (res <= 0) { + goto error_check; + } } - else if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT)) { + + if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT)) { PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(obj); dictptr = (PyObject **)&managed_dict->dict; } @@ -1779,7 +1778,7 @@ PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context) PyObject **dictptr = _PyObject_GetDictPtr(obj); if (dictptr == NULL) { if (_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_INLINE_VALUES) && - _PyObject_ManagedDictPointer(obj)->dict == NULL + _PyObject_GetManagedDict(obj) == NULL ) { /* Was unable to convert to dict */ PyErr_NoMemory(); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2f356388785665..fc82b6ae6dfc4d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3165,9 +3165,9 @@ subtype_setdict(PyObject *obj, PyObject *value, void *context) "not a '%.200s'", Py_TYPE(value)->tp_name); return -1; } + if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - PyObject_ClearManagedDict(obj); - _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)Py_XNewRef(value); + _PyObject_SetManagedDict(obj, value); } else { dictptr = _PyObject_ComputedDictPointer(obj); @@ -6194,15 +6194,17 @@ object_set_class(PyObject *self, PyObject *value, void *closure) /* Changing the class will change the implicit dict keys, * so we must materialize the dictionary first. */ if (oldto->tp_flags & Py_TPFLAGS_INLINE_VALUES) { - PyDictObject *dict = _PyObject_ManagedDictPointer(self)->dict; - if (dict == NULL) { - dict = (PyDictObject *)_PyObject_MakeDictFromInstanceAttributes(self); - if (dict == NULL) { - return -1; - } - _PyObject_ManagedDictPointer(self)->dict = dict; + PyDictObject *dict = _PyObject_MaterializeManagedDict(self); + bool error = false; + + Py_BEGIN_CRITICAL_SECTION2(self, dict); + + if (dict == NULL || _PyDict_DetachFromObject(dict, self)) { + error = true; } - if (_PyDict_DetachFromObject(dict, self)) { + + Py_END_CRITICAL_SECTION2(); + if (error) { return -1; } } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c34d702f06418e..d2b8a2efb47dff 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1945,15 +1945,13 @@ dummy_func( op(_CHECK_ATTR_WITH_HINT, (owner -- owner)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); DEOPT_IF(dict == NULL); assert(PyDict_CheckExact((PyObject *)dict)); } op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, null if (oparg & 1))) { - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); if (DK_IS_UNICODE(dict->ma_keys)) { @@ -2070,14 +2068,15 @@ dummy_func( op(_GUARD_DORV_NO_DICT, (owner -- owner)) { assert(Py_TYPE(owner)->tp_dictoffset < 0); assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_INLINE_VALUES); - DEOPT_IF(_PyObject_ManagedDictPointer(owner)->dict); + DEOPT_IF(_PyObject_GetManagedDict(owner)); DEOPT_IF(_PyObject_InlineValues(owner)->valid == 0); } op(_STORE_ATTR_INSTANCE_VALUE, (index/1, value, owner --)) { STAT_INC(STORE_ATTR, hit); - assert(_PyObject_ManagedDictPointer(owner)->dict == NULL); + assert(_PyObject_GetManagedDict(owner) == NULL); PyDictValues *values = _PyObject_InlineValues(owner); + PyObject *old_value = values->values[index]; values->values[index] = value; if (old_value == NULL) { @@ -2086,6 +2085,7 @@ dummy_func( else { Py_DECREF(old_value); } + Py_DECREF(owner); } @@ -2100,8 +2100,7 @@ dummy_func( assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); DEOPT_IF(dict == NULL); assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fccff24a418586..798c2303b6fb25 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1998,8 +1998,7 @@ PyObject *owner; owner = stack_pointer[-1]; assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); if (dict == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -2015,8 +2014,7 @@ oparg = CURRENT_OPARG(); owner = stack_pointer[-1]; uint16_t hint = (uint16_t)CURRENT_OPERAND(); - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); if (hint >= (size_t)dict->ma_keys->dk_nentries) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -2159,7 +2157,7 @@ owner = stack_pointer[-1]; assert(Py_TYPE(owner)->tp_dictoffset < 0); assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_INLINE_VALUES); - if (_PyObject_ManagedDictPointer(owner)->dict) { + if (_PyObject_GetManagedDict(owner)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -2177,7 +2175,7 @@ value = stack_pointer[-2]; uint16_t index = (uint16_t)CURRENT_OPERAND(); STAT_INC(STORE_ATTR, hit); - assert(_PyObject_ManagedDictPointer(owner)->dict == NULL); + assert(_PyObject_GetManagedDict(owner) == NULL); PyDictValues *values = _PyObject_InlineValues(owner); PyObject *old_value = values->values[index]; values->values[index] = value; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a7764b0ec12e10..e9086ecb03786e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4010,16 +4010,14 @@ // _CHECK_ATTR_WITH_HINT { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); DEOPT_IF(dict == NULL, LOAD_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); } // _LOAD_ATTR_WITH_HINT { uint16_t hint = read_u16(&this_instr[4].cache); - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); if (DK_IS_UNICODE(dict->ma_keys)) { @@ -5301,7 +5299,7 @@ { assert(Py_TYPE(owner)->tp_dictoffset < 0); assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_INLINE_VALUES); - DEOPT_IF(_PyObject_ManagedDictPointer(owner)->dict, STORE_ATTR); + DEOPT_IF(_PyObject_GetManagedDict(owner), STORE_ATTR); DEOPT_IF(_PyObject_InlineValues(owner)->valid == 0, STORE_ATTR); } // _STORE_ATTR_INSTANCE_VALUE @@ -5309,7 +5307,7 @@ { uint16_t index = read_u16(&this_instr[4].cache); STAT_INC(STORE_ATTR, hit); - assert(_PyObject_ManagedDictPointer(owner)->dict == NULL); + assert(_PyObject_GetManagedDict(owner) == NULL); PyDictValues *values = _PyObject_InlineValues(owner); PyObject *old_value = values->values[index]; values->values[index] = value; @@ -5372,8 +5370,7 @@ assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); DEOPT_IF(dict == NULL, STORE_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); diff --git a/Python/specialize.c b/Python/specialize.c index 5e14bb56b30036..ee51781372166a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -852,8 +852,7 @@ specialize_dict_access( instr->op.code = values_op; } else { - PyManagedDictPointer *managed_dict = _PyObject_ManagedDictPointer(owner); - PyDictObject *dict = managed_dict->dict; + PyDictObject *dict = _PyObject_GetManagedDict(owner); if (dict == NULL || !PyDict_CheckExact(dict)) { SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NO_DICT); return 0; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index d17b2b9b024b99..18cefa08328804 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -354,6 +354,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: NON_ESCAPING_FUNCTIONS = ( "Py_INCREF", "_PyManagedDictPointer_IsValues", + "_PyObject_GetManagedDict", "_PyObject_ManagedDictPointer", "_PyObject_InlineValues", "_PyDictValues_AddToInsertionOrder", From 68fa24382a98b2b7916da69ab2e80925165af873 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 4 Apr 2024 15:48:26 -0700 Subject: [PATCH 05/19] Insert into dict on store if dict is materialized and other feedback --- Objects/dictobject.c | 108 +++++++++++++++++++++++++++++-------------- Objects/object.c | 4 +- 2 files changed, 75 insertions(+), 37 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index e6976f8a534522..547396fc40b9f9 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6616,6 +6616,23 @@ make_dict_from_instance_attributes(PyInterpreterState *interp, return res; } +static PyDictObject * +materialize_managed_dict_lock_held(PyObject *obj) +{ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); + + OBJECT_STAT_INC(dict_materialized_on_request); + + PyDictValues *values = _PyObject_InlineValues(obj); + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); + OBJECT_STAT_INC(dict_materialized_on_request); + PyDictObject *dict = make_dict_from_instance_attributes(interp, keys, values); + FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)dict); + return dict; +} + PyDictObject * _PyObject_MaterializeManagedDict(PyObject *obj) { @@ -6633,17 +6650,7 @@ _PyObject_MaterializeManagedDict(PyObject *obj) goto exit; } #endif - - OBJECT_STAT_INC(dict_materialized_on_request); - - PyDictValues *values = _PyObject_InlineValues(obj); - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); - OBJECT_STAT_INC(dict_materialized_on_request); - dict = make_dict_from_instance_attributes(interp, keys, values); - - FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, - (PyDictObject *)dict); + dict = materialize_managed_dict_lock_held(obj); #ifdef Py_GIL_DISABLED exit: @@ -6652,8 +6659,9 @@ _PyObject_MaterializeManagedDict(PyObject *obj) return dict; } - - +// Called with either the object's lock or the dict's lock held +// depending on whether or not a dict has been materialized for +// the object. static int store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, PyObject *name, PyObject *value) @@ -6703,14 +6711,38 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, } if (ix == DKIX_EMPTY) { + int res; if (dict == NULL) { - dict = _PyObject_MaterializeManagedDict(obj); + // Make the dict but don't publish it in the object + // so that no one else will see it. + dict = materialize_managed_dict_lock_held(obj); if (dict == NULL) { return -1; } + + if (value == NULL) { + res = PyDict_DelItem((PyObject *)dict, name); + } else { + res = PyDict_SetItem((PyObject *)dict, name, value); + } + + return res; } - // Caller needs to insert into materialized dict - return 1; + + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(dict); + + if (value == NULL) { + Py_hash_t hash; + if (!PyUnicode_CheckExact(name) || (hash = unicode_get_hash(name)) == -1) { + hash = PyObject_Hash(name); + if (hash == -1) + return -1; + } + res = delitem_knownhash_lock_held((PyObject *)dict, name, hash); + } else { + res = setitem_lock_held(dict, name, value); + } + return res; } PyObject *old_value = values->values[ix]; @@ -6742,12 +6774,32 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, return 0; } +static inline int +store_instance_attr_dict(PyObject *obj, PyDictObject *dict, PyObject *name, PyObject *value) +{ + PyDictValues *values = _PyObject_InlineValues(obj); + int res; + Py_BEGIN_CRITICAL_SECTION(dict); + if (dict->ma_values == values) { + res = store_instance_attr_lock_held(obj, values, name, value); + } + else { + if (value == NULL) { + res = PyDict_DelItem((PyObject *)dict, name); + } else { + res = PyDict_SetItem((PyObject *)dict, name, value); + } + } + Py_END_CRITICAL_SECTION(); + return res; +} + int _PyObject_TryStoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value) { PyDictValues *values = _PyObject_InlineValues(obj); if (!FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { - return 1; + return store_instance_attr_dict(obj, _PyObject_GetManagedDict(obj), name, value); } #ifdef Py_GIL_DISABLED @@ -6764,7 +6816,6 @@ _PyObject_TryStoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *val // prevent resizing. PyDictObject *dict = _PyObject_GetManagedDict(obj); if (dict == NULL) { - int retry_with_dict = 0; Py_BEGIN_CRITICAL_SECTION(obj); dict = _PyObject_GetManagedDict(obj); @@ -6774,24 +6825,13 @@ _PyObject_TryStoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *val else { // We lost a race with the materialization of the dict, we'll // try the insert with it... - retry_with_dict = 1; - } - Py_END_CRITICAL_SECTION(); - if (retry_with_dict) { goto with_dict; } + Py_END_CRITICAL_SECTION(); } else { with_dict: - Py_BEGIN_CRITICAL_SECTION(dict); - if (dict->ma_values == values) { - res = store_instance_attr_lock_held(obj, values, name, value); - } - else { - // Caller needs to insert into dict - res = 1; - } - Py_END_CRITICAL_SECTION(); + res = store_instance_attr_dict(obj, dict, name, value); } return res; #else @@ -6868,7 +6908,7 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr if (dict == NULL) { // Still no dict, we can read from the values assert(values->valid); - value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); + value = values->values[ix]; *attr = Py_XNewRef(value); success = true; } @@ -7057,10 +7097,10 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) assert(mp->ma_values->embedded == 1); assert(mp->ma_values->valid == 1); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES); - Py_BEGIN_CRITICAL_SECTION(mp); + mp->ma_values = copy_values(mp->ma_values); _PyObject_InlineValues(obj)->valid = 0; - Py_END_CRITICAL_SECTION(); + if (mp->ma_values == NULL) { return -1; } diff --git a/Objects/object.c b/Objects/object.c index 4b12e8d1525ef0..8f455484dce33d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1699,9 +1699,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES)) { res = _PyObject_TryStoreInstanceAttribute(obj, name, value); - if (res <= 0) { - goto error_check; - } + goto error_check; } if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT)) { From 6d37f80932196d9f537391c966dd8d2af04c3d27 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 15 Apr 2024 13:27:08 -0700 Subject: [PATCH 06/19] _PyObject_TryStoreInstanceAttribute -> _PyObject_StoreInstanceAttribute Fix issue where critical section isn't released Make _PyObject_TryGetInstanceAttribute return a bool --- Include/internal/pycore_object.h | 6 +- .../internal/pycore_pyatomic_ft_wrappers.h | 3 + Objects/dictobject.c | 65 +++++++++---------- Objects/object.c | 6 +- Objects/typeobject.c | 2 +- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 8ae83879562e9a..cdbdefa26cc200 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -660,10 +660,10 @@ extern PyObject* _PyType_GetDocFromInternalDoc(const char *, const char *); extern PyObject* _PyType_GetTextSignatureFromInternalDoc(const char *, const char *, int); void _PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp); -extern int _PyObject_TryStoreInstanceAttribute(PyObject *obj, +extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value); -extern int _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, - PyObject **attr); +extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, + PyObject **attr); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index cd049ab3e9972f..467bf3f4ef5958 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -32,6 +32,8 @@ extern "C" { _Py_atomic_load_ptr_relaxed(&value) #define FT_ATOMIC_LOAD_UINT8_RELAXED(value) \ _Py_atomic_load_uint8_relaxed(&value) +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ + _Py_atomic_store_uint8_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ @@ -46,6 +48,7 @@ extern "C" { #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value #define FT_ATOMIC_LOAD_PTR_RELAXED(value) value #define FT_ATOMIC_LOAD_UINT8_RELAXED(value) value +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 547396fc40b9f9..adaafadfcd1502 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6629,7 +6629,7 @@ materialize_managed_dict_lock_held(PyObject *obj) OBJECT_STAT_INC(dict_materialized_on_request); PyDictObject *dict = make_dict_from_instance_attributes(interp, keys, values); FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, - (PyDictObject *)dict); + (PyDictObject *)dict); return dict; } @@ -6713,8 +6713,6 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, if (ix == DKIX_EMPTY) { int res; if (dict == NULL) { - // Make the dict but don't publish it in the object - // so that no one else will see it. dict = materialize_managed_dict_lock_held(obj); if (dict == NULL) { return -1; @@ -6795,7 +6793,7 @@ store_instance_attr_dict(PyObject *obj, PyDictObject *dict, PyObject *name, PyOb } int -_PyObject_TryStoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value) +_PyObject_StoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value) { PyDictValues *values = _PyObject_InlineValues(obj); if (!FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { @@ -6803,7 +6801,6 @@ _PyObject_TryStoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *val } #ifdef Py_GIL_DISABLED - int res; // We have a valid inline values, at least for now... There are two potential // races with having the values become invalid. One is the dictionary // being detached from the object. The other is if someone is inserting @@ -6816,24 +6813,20 @@ _PyObject_TryStoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *val // prevent resizing. PyDictObject *dict = _PyObject_GetManagedDict(obj); if (dict == NULL) { + int res; Py_BEGIN_CRITICAL_SECTION(obj); dict = _PyObject_GetManagedDict(obj); if (dict == NULL) { res = store_instance_attr_lock_held(obj, values, name, value); } - else { - // We lost a race with the materialization of the dict, we'll - // try the insert with it... - goto with_dict; - } Py_END_CRITICAL_SECTION(); + + if (dict == NULL) { + return res; + } } - else { -with_dict: - res = store_instance_attr_dict(obj, dict, name, value); - } - return res; + return store_instance_attr_dict(obj, dict, name, value); #else return store_instance_attr_lock_held(obj, values, name, value); #endif @@ -6873,13 +6866,13 @@ _PyObject_ManagedDictValidityCheck(PyObject *obj) // Attempts to get an instance attribute from the inline values. Returns 0 if // the lookup from the inline values was successful or 1 if the inline values // are no longer valid. No error is set in either case. -int +bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr) { assert(PyUnicode_CheckExact(name)); PyDictValues *values = _PyObject_InlineValues(obj); if (!FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { - return 1; + return false; } PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); @@ -6887,14 +6880,14 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr Py_ssize_t ix = _PyDictKeys_StringLookup(keys, name); if (ix == DKIX_EMPTY) { *attr = NULL; - return 0; + return true; } #ifdef Py_GIL_DISABLED PyObject *value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); if (value == NULL || _Py_TryIncrefCompare(&values->values[ix], value)) { *attr = value; - return 0; + return true; } PyDictObject *dict = _PyObject_GetManagedDict(obj); @@ -6916,33 +6909,34 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr Py_END_CRITICAL_SECTION(); if (success) { - return 0; + return true; } } // We have a dictionary, we'll need to lock it to prevent // the values from being resized. assert(dict != NULL); - int res; + + bool success; Py_BEGIN_CRITICAL_SECTION(dict); if (dict->ma_values == values && FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); *attr = Py_XNewRef(value); - res = 0; + success = true; } else { // Caller needs to lookup from the dictionary - res = 1; + success = false; } Py_END_CRITICAL_SECTION(); - return res; + return success; #else PyObject *value = values->values[ix]; *attr = Py_XNewRef(value); - return 0; + return true; #endif } @@ -7003,14 +6997,9 @@ PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) static bool set_dict_inline_values(PyObject *obj, PyObject *new_dict) { - PyDictValues *values = _PyObject_InlineValues(obj); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); - if (values->valid) { - for (Py_ssize_t i = 0; i < values->capacity; i++) { - Py_CLEAR(values->values[i]); - } - values->valid = 0; - } + PyDictValues *values = _PyObject_InlineValues(obj); #ifdef Py_GIL_DISABLED PyDictObject *dict = _PyObject_ManagedDictPointer(obj)->dict; @@ -7022,6 +7011,14 @@ set_dict_inline_values(PyObject *obj, PyObject *new_dict) Py_XINCREF(new_dict); _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)new_dict; + + if (values->valid) { + FT_ATOMIC_STORE_UINT8_RELAXED(values->valid, 0); + for (Py_ssize_t i = 0; i < values->capacity; i++) { + Py_CLEAR(values->values[i]); + } + } + return true; } @@ -7032,7 +7029,7 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) assert(_PyObject_InlineValuesConsistencyCheck(obj)); PyTypeObject *tp = Py_TYPE(obj); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { - PyDictObject *dict = _PyObject_ManagedDictPointer(obj)->dict; + PyDictObject *dict = _PyObject_GetManagedDict(obj); if (dict) { #ifdef Py_GIL_DISABLED clear_dict: @@ -7120,7 +7117,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) dict = _PyObject_GetManagedDict(obj); if (dict == NULL && (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && - _PyObject_InlineValues(obj)->valid) { + FT_ATOMIC_LOAD_UINT8_RELAXED(_PyObject_InlineValues(obj)->valid)) { dict = _PyObject_MaterializeManagedDict(obj); } else if (dict == NULL) { diff --git a/Objects/object.c b/Objects/object.c index 8f455484dce33d..bba8baff010a94 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1483,7 +1483,7 @@ _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) } PyObject *dict, *attr; if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && - !_PyObject_TryGetInstanceAttribute(obj, name, &attr)) { + _PyObject_TryGetInstanceAttribute(obj, name, &attr)) { if (attr != NULL) { *method = attr; Py_XDECREF(descr); @@ -1587,7 +1587,7 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, if (dict == NULL) { if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES)) { if (PyUnicode_CheckExact(name) && - !_PyObject_TryGetInstanceAttribute(obj, name, &res)) { + _PyObject_TryGetInstanceAttribute(obj, name, &res)) { if (res != NULL) { goto done; } @@ -1698,7 +1698,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, PyObject **dictptr; if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES)) { - res = _PyObject_TryStoreInstanceAttribute(obj, name, value); + res = _PyObject_StoreInstanceAttribute(obj, name, value); goto error_check; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index fc82b6ae6dfc4d..3690ba10dd6626 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6199,7 +6199,7 @@ object_set_class(PyObject *self, PyObject *value, void *closure) Py_BEGIN_CRITICAL_SECTION2(self, dict); - if (dict == NULL || _PyDict_DetachFromObject(dict, self)) { + if (dict == NULL || _PyDict_DetachFromObject(dict, self) < 0) { error = true; } From 91e82d6b8d861efe8410d145a22c34883b4daf7e Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 16 Apr 2024 16:46:29 -0700 Subject: [PATCH 07/19] Cleanup _PyObject_SetManagedDict, use cst for inline valid flag, fix issue w/ deleted dict --- .../internal/pycore_pyatomic_ft_wrappers.h | 14 ++- Lib/test/test_class.py | 9 ++ Objects/dictobject.c | 114 ++++++++++-------- Objects/typeobject.c | 12 +- 4 files changed, 93 insertions(+), 56 deletions(-) diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 467bf3f4ef5958..9d897aff601975 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -21,6 +21,7 @@ extern "C" { #ifdef Py_GIL_DISABLED #define FT_ATOMIC_LOAD_PTR(value) _Py_atomic_load_ptr(&value) +#define FT_ATOMIC_STORE_PTR(value, new_value) _Py_atomic_store_ptr(&value, new_value) #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) #define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) \ _Py_atomic_load_ssize_acquire(&value) @@ -30,10 +31,10 @@ extern "C" { _Py_atomic_store_ptr(&value, new_value) #define FT_ATOMIC_LOAD_PTR_RELAXED(value) \ _Py_atomic_load_ptr_relaxed(&value) -#define FT_ATOMIC_LOAD_UINT8_RELAXED(value) \ - _Py_atomic_load_uint8_relaxed(&value) -#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ - _Py_atomic_store_uint8_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_UINT8(value) \ + _Py_atomic_load_uint8(&value) +#define FT_ATOMIC_STORE_UINT8(value, new_value) \ + _Py_atomic_store_uint8(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ @@ -42,13 +43,14 @@ extern "C" { _Py_atomic_store_ssize_relaxed(&value, new_value) #else #define FT_ATOMIC_LOAD_PTR(value) value +#define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value #define FT_ATOMIC_LOAD_PTR_RELAXED(value) value -#define FT_ATOMIC_LOAD_UINT8_RELAXED(value) value -#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_UINT8(value) value +#define FT_ATOMIC_STORE_UINT8(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value diff --git a/Lib/test/test_class.py b/Lib/test/test_class.py index a9cfd8df691845..5885db84b66b01 100644 --- a/Lib/test/test_class.py +++ b/Lib/test/test_class.py @@ -873,6 +873,15 @@ def __init__(self): obj.foo = None # Aborted here self.assertEqual(obj.__dict__, {"foo":None}) + def test_store_attr_deleted_dict(self): + class Foo: + pass + + f = Foo() + del f.__dict__ + f.a = 3 + self.assertEqual(f.a, 3) + if __name__ == '__main__': unittest.main() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index adaafadfcd1502..f773abeda4cf44 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6796,8 +6796,15 @@ int _PyObject_StoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value) { PyDictValues *values = _PyObject_InlineValues(obj); - if (!FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { - return store_instance_attr_dict(obj, _PyObject_GetManagedDict(obj), name, value); + if (!FT_ATOMIC_LOAD_UINT8(values->valid)) { + PyDictObject *dict = _PyObject_GetManagedDict(obj); + if (dict == NULL) { + dict = (PyDictObject *)PyObject_GenericGetDict(obj, NULL); + if (dict == NULL) { + return -1; + } + } + return store_instance_attr_dict(obj, dict, name, value); } #ifdef Py_GIL_DISABLED @@ -6871,7 +6878,7 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr { assert(PyUnicode_CheckExact(name)); PyDictValues *values = _PyObject_InlineValues(obj); - if (!FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { + if (!FT_ATOMIC_LOAD_UINT8(values->valid)) { return false; } @@ -6920,8 +6927,7 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr bool success; Py_BEGIN_CRITICAL_SECTION(dict); - if (dict->ma_values == values && - FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { + if (dict->ma_values == values && FT_ATOMIC_LOAD_UINT8(values->valid)) { value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); *attr = Py_XNewRef(value); success = true; @@ -6950,7 +6956,7 @@ _PyObject_IsInstanceDictEmpty(PyObject *obj) PyDictObject *dict; if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); - if (FT_ATOMIC_LOAD_UINT8_RELAXED(values->valid)) { + if (FT_ATOMIC_LOAD_UINT8(values->valid)) { PyDictKeysObject *keys = CACHED_KEYS(tp); for (Py_ssize_t i = 0; i < keys->dk_nentries; i++) { if (FT_ATOMIC_LOAD_PTR_RELAXED(values->values[i]) != NULL) { @@ -6994,32 +7000,22 @@ PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) return 0; } -static bool -set_dict_inline_values(PyObject *obj, PyObject *new_dict) +static void +set_dict_inline_values(PyObject *obj, PyDictObject *new_dict) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); PyDictValues *values = _PyObject_InlineValues(obj); -#ifdef Py_GIL_DISABLED - PyDictObject *dict = _PyObject_ManagedDictPointer(obj)->dict; - if (dict != NULL) { - // We lost a race with materialization of the dict - return false; - } -#endif - Py_XINCREF(new_dict); - _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)new_dict; + FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict, new_dict); if (values->valid) { - FT_ATOMIC_STORE_UINT8_RELAXED(values->valid, 0); + FT_ATOMIC_STORE_UINT8(values->valid, 0); for (Py_ssize_t i = 0; i < values->capacity; i++) { Py_CLEAR(values->values[i]); } } - - return true; } void @@ -7030,47 +7026,67 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) PyTypeObject *tp = Py_TYPE(obj); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictObject *dict = _PyObject_GetManagedDict(obj); - if (dict) { + if (dict == NULL) { #ifdef Py_GIL_DISABLED -clear_dict: -#endif - Py_BEGIN_CRITICAL_SECTION2(dict, obj); - - _PyDict_DetachFromObject(dict, obj); + Py_BEGIN_CRITICAL_SECTION(obj); - Py_XINCREF(new_dict); - _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)new_dict; + dict = _PyObject_ManagedDictPointer(obj)->dict; + if (dict == NULL) { + set_dict_inline_values(obj, (PyDictObject *)new_dict); + } - Py_END_CRITICAL_SECTION2(); + Py_END_CRITICAL_SECTION(); - Py_DECREF(dict); + if (dict == NULL) { + return; + } +#else + set_dict_inline_values(obj, (PyDictObject *)new_dict); + return; +#endif } - else { - bool success; - Py_BEGIN_CRITICAL_SECTION(obj); - success = set_dict_inline_values(obj, new_dict); + Py_BEGIN_CRITICAL_SECTION2(dict, obj); - Py_END_CRITICAL_SECTION(); + // If the dict in the object has been replaced between when we + // got the dict and unlocked the objects then it's + // definitely no longer inline and there's no need to detach + // it, we can just replace it. + PyDictObject *cur_dict = _PyObject_ManagedDictPointer(obj)->dict; + assert(cur_dict == dict || + (cur_dict->ma_values != _PyObject_InlineValues(obj) && + !_PyObject_InlineValues(obj)->valid)); - (void)success; // suppress warning when GIL isn't disabled + Py_XINCREF(new_dict); + FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)Py_XNewRef(new_dict)); -#ifdef Py_GIL_DISABLED - if (!success) { - dict = _PyObject_ManagedDictPointer(obj)->dict; - assert(dict != NULL); - goto clear_dict; - } -#endif + // If we got a replacement dict after locking the object and the dict + // then the old dict had to already have been detached. + assert(cur_dict == dict || + dict->ma_values != _PyObject_InlineValues(obj)); + + if (cur_dict == dict) { + _PyDict_DetachFromObject(dict, obj); } + + Py_END_CRITICAL_SECTION2(); + + Py_DECREF(dict); } else { + PyDictObject *dict; + Py_BEGIN_CRITICAL_SECTION(obj); - Py_XSETREF(_PyObject_ManagedDictPointer(obj)->dict, - (PyDictObject *)Py_XNewRef(new_dict)); + dict = _PyObject_ManagedDictPointer(obj)->dict; + + FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)Py_XNewRef(new_dict)); Py_END_CRITICAL_SECTION(); + + Py_XDECREF(dict); } assert(_PyObject_InlineValuesConsistencyCheck(obj)); } @@ -7085,9 +7101,8 @@ int _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); - assert(_PyObject_ManagedDictPointer(obj)->dict == mp); - assert(_PyObject_InlineValuesConsistencyCheck(obj)); if (mp->ma_values == NULL || mp->ma_values != _PyObject_InlineValues(obj)) { return 0; } @@ -7096,12 +7111,13 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES); mp->ma_values = copy_values(mp->ma_values); - _PyObject_InlineValues(obj)->valid = 0; if (mp->ma_values == NULL) { return -1; } + FT_ATOMIC_STORE_UINT8(_PyObject_InlineValues(obj)->valid, 0); + assert(_PyObject_InlineValuesConsistencyCheck(obj)); ASSERT_CONSISTENT(mp); return 0; @@ -7117,7 +7133,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) dict = _PyObject_GetManagedDict(obj); if (dict == NULL && (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && - FT_ATOMIC_LOAD_UINT8_RELAXED(_PyObject_InlineValues(obj)->valid)) { + FT_ATOMIC_LOAD_UINT8(_PyObject_InlineValues(obj)->valid)) { dict = _PyObject_MaterializeManagedDict(obj); } else if (dict == NULL) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3690ba10dd6626..79589b04c6e4a6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6195,11 +6195,21 @@ object_set_class(PyObject *self, PyObject *value, void *closure) * so we must materialize the dictionary first. */ if (oldto->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictObject *dict = _PyObject_MaterializeManagedDict(self); + if (dict == NULL) { + return -1; + } + bool error = false; Py_BEGIN_CRITICAL_SECTION2(self, dict); - if (dict == NULL || _PyDict_DetachFromObject(dict, self) < 0) { + // If we raced after materialization and replaced the dict + // then the materialized dict should no longer have the + // inline values in which case detach is a nop. + assert(_PyObject_GetManagedDict(self) == dict || + dict->ma_values != _PyObject_InlineValues(self)); + + if (_PyDict_DetachFromObject(dict, self) < 0) { error = true; } From 319f632aebeb3fcf46e3e3b49d25c005bc6b4063 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Wed, 17 Apr 2024 16:48:02 -0700 Subject: [PATCH 08/19] Refactor del/set into set_or_del_lock_held, and avoid locking on newly materialized dict Fix duplicate incref Fix comment Remove redundant if check on detach --- Include/internal/pycore_object.h | 2 +- Objects/dictobject.c | 77 +++++++++++++++----------------- 2 files changed, 37 insertions(+), 42 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index cdbdefa26cc200..88b052f4544b15 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -661,7 +661,7 @@ extern PyObject* _PyType_GetTextSignatureFromInternalDoc(const char *, const cha void _PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp); extern int _PyObject_StoreInstanceAttribute(PyObject *obj, - PyObject *name, PyObject *value); + PyObject *name, PyObject *value); extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f773abeda4cf44..436e98aa222e59 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6659,6 +6659,22 @@ _PyObject_MaterializeManagedDict(PyObject *obj) return dict; } +static int +set_or_del_lock_held(PyDictObject *dict, PyObject *name, PyObject *value) +{ + if (value == NULL) { + Py_hash_t hash; + if (!PyUnicode_CheckExact(name) || (hash = unicode_get_hash(name)) == -1) { + hash = PyObject_Hash(name); + if (hash == -1) + return -1; + } + return delitem_knownhash_lock_held((PyObject *)dict, name, hash); + } else { + return setitem_lock_held(dict, name, value); + } +} + // Called with either the object's lock or the dict's lock held // depending on whether or not a dict has been materialized for // the object. @@ -6713,33 +6729,22 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, if (ix == DKIX_EMPTY) { int res; if (dict == NULL) { - dict = materialize_managed_dict_lock_held(obj); - if (dict == NULL) { + // Make the dict but don't publish it in the object + // so that no one else will see it. + dict = make_dict_from_instance_attributes(PyInterpreterState_Get(), keys, values); + if (dict == NULL || + set_or_del_lock_held(dict, name, value) < 0) { return -1; } - if (value == NULL) { - res = PyDict_DelItem((PyObject *)dict, name); - } else { - res = PyDict_SetItem((PyObject *)dict, name, value); - } - - return res; + FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)dict); + return 0; } _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(dict); - if (value == NULL) { - Py_hash_t hash; - if (!PyUnicode_CheckExact(name) || (hash = unicode_get_hash(name)) == -1) { - hash = PyObject_Hash(name); - if (hash == -1) - return -1; - } - res = delitem_knownhash_lock_held((PyObject *)dict, name, hash); - } else { - res = setitem_lock_held(dict, name, value); - } + res = set_or_del_lock_held (dict, name, value); return res; } @@ -6782,11 +6787,7 @@ store_instance_attr_dict(PyObject *obj, PyDictObject *dict, PyObject *name, PyOb res = store_instance_attr_lock_held(obj, values, name, value); } else { - if (value == NULL) { - res = PyDict_DelItem((PyObject *)dict, name); - } else { - res = PyDict_SetItem((PyObject *)dict, name, value); - } + res = set_or_del_lock_held(dict, name, value); } Py_END_CRITICAL_SECTION(); return res; @@ -6870,9 +6871,8 @@ _PyObject_ManagedDictValidityCheck(PyObject *obj) } #endif -// Attempts to get an instance attribute from the inline values. Returns 0 if -// the lookup from the inline values was successful or 1 if the inline values -// are no longer valid. No error is set in either case. +// Attempts to get an instance attribute from the inline values. Returns true +// if successful, or false if the caller needs to lookup in the dictionary. bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr) { @@ -7048,27 +7048,22 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) Py_BEGIN_CRITICAL_SECTION2(dict, obj); - // If the dict in the object has been replaced between when we - // got the dict and unlocked the objects then it's - // definitely no longer inline and there's no need to detach - // it, we can just replace it. +#ifdef Py_DEBUG + // If the dict in the object has been replaced between when we got + // the dict and unlocked the objects then it's definitely no longer + // inline and there's no need to detach it, we can just replace it. + // The call to _PyDict_DetachFromObject will be a nop. PyDictObject *cur_dict = _PyObject_ManagedDictPointer(obj)->dict; assert(cur_dict == dict || (cur_dict->ma_values != _PyObject_InlineValues(obj) && + dict->ma_values != _PyObject_InlineValues(obj) && !_PyObject_InlineValues(obj)->valid)); +#endif - Py_XINCREF(new_dict); FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict, (PyDictObject *)Py_XNewRef(new_dict)); - // If we got a replacement dict after locking the object and the dict - // then the old dict had to already have been detached. - assert(cur_dict == dict || - dict->ma_values != _PyObject_InlineValues(obj)); - - if (cur_dict == dict) { - _PyDict_DetachFromObject(dict, obj); - } + _PyDict_DetachFromObject(dict, obj); Py_END_CRITICAL_SECTION2(); From 2f6ca5874000bb0c1d1383e651c0be6d840b8000 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 12 Mar 2024 14:08:22 -0700 Subject: [PATCH 09/19] tracing thread safety --- Include/internal/pycore_ceval_state.h | 1 + .../internal/pycore_pyatomic_ft_wrappers.h | 7 + Lib/test/test_free_threading/__init__.py | 7 + .../test_free_threading/test_monitoring.py | 232 ++++++++++++++++++ Makefile.pre.in | 1 + Python/instrumentation.c | 214 +++++++++++----- Python/legacy_tracing.c | 95 ++++--- Python/pystate.c | 1 + 8 files changed, 472 insertions(+), 86 deletions(-) create mode 100644 Lib/test/test_free_threading/__init__.py create mode 100644 Lib/test/test_free_threading/test_monitoring.py diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index b453328f15649e..168295534e036c 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -63,6 +63,7 @@ struct _ceval_runtime_state { } perf; /* Pending calls to be made only on the main thread. */ struct _pending_calls pending_mainthread; + PyMutex sys_trace_profile_mutex; }; #ifdef PY_HAVE_PERF_TRAMPOLINE diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 2514f51f1b0086..3f0c413ad31b5b 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -26,20 +26,27 @@ extern "C" { _Py_atomic_load_ssize_relaxed(&value) #define FT_ATOMIC_STORE_PTR(value, new_value) \ _Py_atomic_store_ptr(&value, new_value) +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \ + _Py_atomic_load_ptr_acquire(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ + _Py_atomic_store_uint8_relaxed(&value, new_value) #else #define FT_ATOMIC_LOAD_PTR(value) value #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value + #endif #ifdef __cplusplus diff --git a/Lib/test/test_free_threading/__init__.py b/Lib/test/test_free_threading/__init__.py new file mode 100644 index 00000000000000..9a89d27ba9f979 --- /dev/null +++ b/Lib/test/test_free_threading/__init__.py @@ -0,0 +1,7 @@ +import os + +from test import support + + +def load_tests(*args): + return support.load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py new file mode 100644 index 00000000000000..b10a0e39053c1c --- /dev/null +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -0,0 +1,232 @@ +"""Tests monitoring, sys.settrace, and sys.setprofile in a multi-threaded +environmenet to verify things are thread-safe in a free-threaded build""" + +import sys +import time +import unittest +import weakref + +from sys import monitoring +from test.support import is_wasi +from threading import Thread +from unittest import TestCase + + +class InstrumentationMultiThreadedMixin: + if not hasattr(sys, "gettotalrefcount"): + thread_count = 50 + func_count = 1000 + fib = 15 + else: + # Run a little faster in debug builds... + thread_count = 25 + func_count = 500 + fib = 15 + + def after_threads(self): + """Runs once after all the threads have started""" + pass + + def during_threads(self): + """Runs repeatedly while the threads are still running""" + pass + + def work(self, n, funcs): + """Fibonacci function which also calls a bunch of random functions""" + for func in funcs: + func() + if n < 2: + return n + return self.work(n - 1, funcs) + self.work(n - 2, funcs) + + def start_work(self, n, funcs): + # With the GIL builds we need to make sure that the hooks have + # a chance to run as it's possible to run w/o releasing the GIL. + time.sleep(1) + self.work(n, funcs) + + def after_test(self): + """Runs once after the test is done""" + pass + + def test_instrumention(self): + # Setup a bunch of functions which will need instrumentation... + funcs = [] + for i in range(self.func_count): + x = {} + exec("def f(): pass", x) + funcs.append(x["f"]) + + threads = [] + for i in range(self.thread_count): + # Each thread gets a copy of the func list to avoid contention + t = Thread(target=self.start_work, args=(self.fib, list(funcs))) + t.start() + threads.append(t) + + self.after_threads() + + while True: + any_alive = False + for t in threads: + if t.is_alive(): + any_alive = True + break + + if not any_alive: + break + + self.during_threads() + + self.after_test() + + +class MonitoringTestMixin: + def setUp(self): + for i in range(6): + if monitoring.get_tool(i) is None: + self.tool_id = i + monitoring.use_tool_id(i, self.__class__.__name__) + break + + def tearDown(self): + monitoring.free_tool_id(self.tool_id) + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetPreTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Sets tracing one time after the threads have started""" + + def setUp(self): + super().setUp() + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def after_threads(self): + sys.settrace(self.trace_func) + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class MonitoringMultiThreaded( + MonitoringTestMixin, InstrumentationMultiThreadedMixin, TestCase +): + """Uses sys.monitoring and repeatedly toggles instrumentation on and off""" + + def setUp(self): + super().setUp() + self.set = False + self.called = False + monitoring.register_callback( + self.tool_id, monitoring.events.LINE, self.callback + ) + + def tearDown(self): + monitoring.set_events(self.tool_id, 0) + super().tearDown() + + def callback(self, *args): + self.called = True + + def after_test(self): + self.assertTrue(self.called) + + def during_threads(self): + if self.set: + monitoring.set_events( + self.tool_id, monitoring.events.CALL | monitoring.events.LINE + ) + else: + monitoring.set_events(self.tool_id, 0) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Uses sys.settrace and repeatedly toggles instrumentation on and off""" + + def setUp(self): + self.set = False + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def tearDown(self): + sys.settrace(None) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def during_threads(self): + if self.set: + sys.settrace(self.trace_func) + else: + sys.settrace(None) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Uses sys.setprofile and repeatedly toggles instrumentation on and off""" + thread_count = 25 + func_count = 200 + fib = 15 + + def setUp(self): + self.set = False + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def tearDown(self): + sys.setprofile(None) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def during_threads(self): + if self.set: + sys.setprofile(self.trace_func) + else: + sys.setprofile(None) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class MonitoringMisc(MonitoringTestMixin, TestCase): + def register_callback(self): + def callback(*args): + pass + + for i in range(200): + monitoring.register_callback(self.tool_id, monitoring.events.LINE, callback) + + self.refs.append(weakref.ref(callback)) + + def test_register_callback(self): + self.refs = [] + threads = [] + for i in range(50): + t = Thread(target=self.register_callback) + t.start() + threads.append(t) + + for thread in threads: + thread.join() + + monitoring.register_callback(self.tool_id, monitoring.events.LINE, None) + for ref in self.refs: + self.assertEqual(ref(), None) + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index fd8678cdaf8207..f7c21a380caa99 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2366,6 +2366,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_doctest \ test/test_email \ test/test_email/data \ + test/test_free_threading \ test/test_future_stmt \ test/test_gdb \ test/test_import \ diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 3866144a19bf74..4b743135f6d203 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -6,6 +6,7 @@ #include "pycore_call.h" #include "pycore_ceval.h" // _PY_EVAL_EVENTS_BITS #include "pycore_code.h" // _PyCode_Clear_Executors() +#include "pycore_critical_section.h" #include "pycore_frame.h" #include "pycore_interp.h" #include "pycore_long.h" @@ -13,12 +14,22 @@ #include "pycore_namespace.h" #include "pycore_object.h" #include "pycore_opcode_metadata.h" // IS_VALID_OPCODE, _PyOpcode_Caches +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINT8_RELAXED #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() /* Uncomment this to dump debugging output when assertions fail */ // #define INSTRUMENT_DEBUG 1 +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) \ + if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); \ + } +#else +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) +#endif + PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type); PyObject _PyInstrumentation_MISSING = _PyObject_HEAD_INIT(&PyBaseObject_Type); @@ -275,17 +286,39 @@ compute_line(PyCodeObject *code, int offset, int8_t line_delta) return PyCode_Addr2Line(code, offset * sizeof(_Py_CODEUNIT)); } +static inline _PyCoMonitoringData * +get_monitoring(PyCodeObject *code) { + return FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring); +} + +static inline _PyCoLineInstrumentationData * +get_lines_data(PyCodeObject *code) +{ + _PyCoMonitoringData *monitoring = get_monitoring(code); + return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->lines); +} + +static inline uint8_t * +get_per_instruction_opcodes(PyCodeObject *code) +{ + _PyCoMonitoringData *monitoring = get_monitoring(code); + return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->per_instruction_opcodes); +} + int _PyInstruction_GetLength(PyCodeObject *code, int offset) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + + _PyCoMonitoringData *monitoring = get_monitoring(code); int opcode = _PyCode_CODE(code)[offset].op.code; assert(opcode != 0); assert(opcode != RESERVED); if (opcode == INSTRUMENTED_LINE) { - opcode = code->_co_monitoring->lines[offset].original_opcode; + opcode = monitoring->lines[offset].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = code->_co_monitoring->per_instruction_opcodes[offset]; + opcode = monitoring->per_instruction_opcodes[offset]; } int deinstrumented = DE_INSTRUMENT[opcode]; if (deinstrumented) { @@ -424,6 +457,7 @@ dump_instrumentation_data(PyCodeObject *code, int star, FILE*out) } #define CHECK(test) do { \ + ASSERT_WORLD_STOPPED_OR_LOCKED(code); \ if (!(test)) { \ dump_instrumentation_data(code, i, stderr); \ } \ @@ -449,6 +483,8 @@ valid_opcode(int opcode) static void sanity_check_instrumentation(PyCodeObject *code) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + _PyCoMonitoringData *data = code->_co_monitoring; if (data == NULL) { return; @@ -551,10 +587,12 @@ int _Py_GetBaseOpcode(PyCodeObject *code, int i) { int opcode = _PyCode_CODE(code)[i].op.code; if (opcode == INSTRUMENTED_LINE) { - opcode = code->_co_monitoring->lines[i].original_opcode; + _PyCoLineInstrumentationData *lines = get_lines_data(code); + opcode = lines[i].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = code->_co_monitoring->per_instruction_opcodes[i]; + uint8_t *per_instr_opcodes = get_per_instruction_opcodes(code); + opcode = per_instr_opcodes[i]; } CHECK(opcode != INSTRUMENTED_INSTRUCTION); CHECK(opcode != INSTRUMENTED_LINE); @@ -588,7 +626,7 @@ de_instrument(PyCodeObject *code, int i, int event) return; } CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented); - *opcode_ptr = deinstrumented; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, deinstrumented); if (_PyOpcode_Caches[deinstrumented]) { instr[1].counter = adaptive_counter_warmup(); } @@ -632,7 +670,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i) int original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; CHECK(original_opcode != 0); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); - *opcode_ptr = original_opcode; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode); if (_PyOpcode_Caches[original_opcode]) { instr[1].counter = adaptive_counter_warmup(); } @@ -665,7 +703,7 @@ instrument(PyCodeObject *code, int i) int deopt = _PyOpcode_Deopt[opcode]; int instrumented = INSTRUMENTED_OPCODES[deopt]; assert(instrumented); - *opcode_ptr = instrumented; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, instrumented); if (_PyOpcode_Caches[deopt]) { instr[1].counter = adaptive_counter_warmup(); } @@ -683,7 +721,7 @@ instrument_line(PyCodeObject *code, int i) _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; lines->original_opcode = _PyOpcode_Deopt[opcode]; CHECK(lines->original_opcode > 0); - *opcode_ptr = INSTRUMENTED_LINE; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE); } static void @@ -712,12 +750,13 @@ instrument_per_instruction(PyCodeObject *code, int i) code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; } assert(code->_co_monitoring->per_instruction_opcodes[i] > 0); - *opcode_ptr = INSTRUMENTED_INSTRUCTION; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION); } static void remove_tools(PyCodeObject * code, int offset, int event, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -737,6 +776,7 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools) de_instrument(code, offset, event); } } + Py_END_CRITICAL_SECTION(); } #ifndef NDEBUG @@ -752,6 +792,7 @@ tools_is_subset_for_event(PyCodeObject * code, int event, int tools) static void remove_line_tools(PyCodeObject * code, int offset, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -769,11 +810,13 @@ remove_line_tools(PyCodeObject * code, int offset, int tools) de_instrument_line(code, offset); } } + Py_END_CRITICAL_SECTION(); } static void add_tools(PyCodeObject * code, int offset, int event, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -794,6 +837,8 @@ add_tools(PyCodeObject * code, int offset, int event, int tools) static void add_line_tools(PyCodeObject * code, int offset, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_LINE, tools)); assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -810,6 +855,8 @@ add_line_tools(PyCodeObject * code, int offset, int tools) static void add_per_instruction_tools(PyCodeObject * code, int offset, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_INSTRUCTION, tools)); assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { @@ -826,6 +873,8 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools) static void remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); + assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset]; @@ -842,6 +891,7 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) de_instrument_per_instruction(code, offset); } } + Py_END_CRITICAL_SECTION(); } @@ -969,8 +1019,9 @@ get_tools_for_instruction(PyCodeObject *code, PyInterpreterState *interp, int i, if (PY_MONITORING_IS_INSTRUMENTED_EVENT(event)) { CHECK(is_version_up_to_date(code, interp)); CHECK(instrumentation_cross_checks(interp, code)); - if (code->_co_monitoring->tools) { - tools = code->_co_monitoring->tools[i]; + uint8_t *monitoring_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring->tools); + if (monitoring_tools) { + tools = monitoring_tools[i]; } else { tools = code->_co_monitoring->active_monitors.tools[event]; @@ -1149,7 +1200,7 @@ _Py_call_instrumentation_exc2( int _Py_Instrumentation_GetLine(PyCodeObject *code, int index) { - _PyCoMonitoringData *monitoring = code->_co_monitoring; + _PyCoMonitoringData *monitoring = get_monitoring(code); assert(monitoring != NULL); assert(monitoring->lines != NULL); assert(index >= code->_co_firsttraceable); @@ -1168,7 +1219,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, assert(instrumentation_cross_checks(tstate->interp, code)); int i = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *monitoring = code->_co_monitoring; + _PyCoMonitoringData *monitoring = get_monitoring(code); _PyCoLineInstrumentationData *line_data = &monitoring->lines[i]; if (tstate->tracing) { goto done; @@ -1189,10 +1240,12 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, goto done; } } - uint8_t tools = code->_co_monitoring->line_tools != NULL ? - code->_co_monitoring->line_tools[i] : + + uint8_t *line_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->line_tools); + uint8_t tools = line_tools != NULL ? + line_tools[i] : (interp->monitors.tools[PY_MONITORING_EVENT_LINE] | - code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] + monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] ); /* Special case sys.settrace to avoid boxing the line number, * only to immediately unbox it. */ @@ -1269,15 +1322,17 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* assert(is_version_up_to_date(code, tstate->interp)); assert(instrumentation_cross_checks(tstate->interp, code)); int offset = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *instrumentation_data = code->_co_monitoring; - assert(instrumentation_data->per_instruction_opcodes); - int next_opcode = instrumentation_data->per_instruction_opcodes[offset]; + _PyCoMonitoringData *instr_data = get_monitoring(code); + uint8_t *per_instr_opcodes = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_opcodes); + assert(per_instr_opcodes); + int next_opcode = per_instr_opcodes[offset]; if (tstate->tracing) { return next_opcode; } PyInterpreterState *interp = tstate->interp; - uint8_t tools = instrumentation_data->per_instruction_tools != NULL ? - instrumentation_data->per_instruction_tools[offset] : + uint8_t *per_instr_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_tools); + uint8_t tools = per_instr_tools != NULL ? + per_instr_tools[offset] : (interp->monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] | code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] ); @@ -1320,15 +1375,23 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) PyInterpreterState *is = _PyInterpreterState_GET(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS); +#ifdef Py_GIL_DISABLED + PyObject *callback = _Py_atomic_exchange_ptr( + &is->monitoring_callables[tool_id][event_id], + Py_XNewRef(obj) + ); +#else PyObject *callback = is->monitoring_callables[tool_id][event_id]; is->monitoring_callables[tool_id][event_id] = Py_XNewRef(obj); +#endif return callback; } static void -initialize_tools(PyCodeObject *code) +initialize_tools(PyCodeObject *code, uint8_t* tools) { - uint8_t* tools = code->_co_monitoring->tools; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools != NULL); int code_len = (int)Py_SIZE(code); for (int i = 0; i < code_len; i++) { @@ -1382,9 +1445,10 @@ initialize_tools(PyCodeObject *code) #define NO_LINE -128 static void -initialize_lines(PyCodeObject *code) +initialize_lines(PyCodeObject *code, _PyCoLineInstrumentationData *line_data) { - _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(line_data != NULL); int code_len = (int)Py_SIZE(code); PyCodeAddressRange range; @@ -1499,9 +1563,10 @@ initialize_lines(PyCodeObject *code) } static void -initialize_line_tools(PyCodeObject *code, _Py_LocalMonitors *all_events) +initialize_line_tools(PyCodeObject *code, uint8_t *line_tools, _Py_LocalMonitors *all_events) { - uint8_t *line_tools = code->_co_monitoring->line_tools; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(line_tools != NULL); int code_len = (int)Py_SIZE(code); for (int i = 0; i < code_len; i++) { @@ -1514,18 +1579,19 @@ allocate_instrumentation_data(PyCodeObject *code) { if (code->_co_monitoring == NULL) { - code->_co_monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); - if (code->_co_monitoring == NULL) { + _PyCoMonitoringData *monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); + if (monitoring == NULL) { PyErr_NoMemory(); return -1; } - code->_co_monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; - code->_co_monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; - code->_co_monitoring->tools = NULL; - code->_co_monitoring->lines = NULL; - code->_co_monitoring->line_tools = NULL; - code->_co_monitoring->per_instruction_opcodes = NULL; - code->_co_monitoring->per_instruction_tools = NULL; + monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; + monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; + monitoring->tools = NULL; + monitoring->lines = NULL; + monitoring->line_tools = NULL; + monitoring->per_instruction_opcodes = NULL; + monitoring->per_instruction_tools = NULL; + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring, monitoring); } return 0; } @@ -1533,6 +1599,8 @@ allocate_instrumentation_data(PyCodeObject *code) static int update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + int code_len = (int)Py_SIZE(code); if (allocate_instrumentation_data(code)) { return -1; @@ -1542,61 +1610,70 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) code->_co_monitoring->local_monitors); bool multitools = multiple_tools(&all_events); if (code->_co_monitoring->tools == NULL && multitools) { - code->_co_monitoring->tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->tools == NULL) { + uint8_t *tools = PyMem_Malloc(code_len); + if (tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_tools(code); + initialize_tools(code, tools); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->tools, tools); } if (all_events.tools[PY_MONITORING_EVENT_LINE]) { if (code->_co_monitoring->lines == NULL) { - code->_co_monitoring->lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (code->_co_monitoring->lines == NULL) { + _PyCoLineInstrumentationData *lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (lines == NULL) { PyErr_NoMemory(); return -1; } - initialize_lines(code); + initialize_lines(code, lines); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->lines, lines); } if (multitools && code->_co_monitoring->line_tools == NULL) { - code->_co_monitoring->line_tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->line_tools == NULL) { + uint8_t *line_tools = PyMem_Malloc(code_len); + if (line_tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_line_tools(code, &all_events); + initialize_line_tools(code, line_tools, &all_events); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->line_tools, line_tools); } } if (all_events.tools[PY_MONITORING_EVENT_INSTRUCTION]) { if (code->_co_monitoring->per_instruction_opcodes == NULL) { - code->_co_monitoring->per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (code->_co_monitoring->per_instruction_opcodes == NULL) { + uint8_t *per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (per_instruction_opcodes == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - code->_co_monitoring->per_instruction_opcodes[i] = 0; + per_instruction_opcodes[i] = 0; } + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_opcodes, + per_instruction_opcodes); } if (multitools && code->_co_monitoring->per_instruction_tools == NULL) { - code->_co_monitoring->per_instruction_tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->per_instruction_tools == NULL) { + uint8_t *per_instruction_tools = PyMem_Malloc(code_len); + if (per_instruction_tools == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - code->_co_monitoring->per_instruction_tools[i] = 0; + per_instruction_tools[i] = 0; } + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_tools, + per_instruction_tools); } } return 0; } -int -_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) +static int +instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + if (is_version_up_to_date(code, interp)) { assert( interp->ceval.instrumentation_version == 0 || @@ -1736,6 +1813,16 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) return 0; } +int +_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(code); + res = instrument_lock_held(code, interp); + Py_END_CRITICAL_SECTION(); + return res; +} + #define C_RETURN_EVENTS \ ((1 << PY_MONITORING_EVENT_C_RETURN) | \ (1 << PY_MONITORING_EVENT_C_RAISE)) @@ -1746,6 +1833,10 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) static int instrument_all_executing_code_objects(PyInterpreterState *interp) { +#ifdef Py_GIL_DISABLED + assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); +#endif + _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); PyThreadState* ts = PyInterpreterState_ThreadHead(interp); @@ -1754,7 +1845,7 @@ instrument_all_executing_code_objects(PyInterpreterState *interp) { _PyInterpreterFrame *frame = ts->current_frame; while (frame) { if (frame->owner != FRAME_OWNED_BY_CSTACK) { - if (_Py_Instrument(_PyFrame_GetCode(frame), interp)) { + if (instrument_lock_held(_PyFrame_GetCode(frame), interp)) { return -1; } } @@ -1817,19 +1908,24 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) if (check_tool(interp, tool_id)) { return -1; } + _PyEval_StopTheWorld(interp); uint32_t existing_events = get_events(&interp->monitors, tool_id); if (existing_events == events) { + _PyEval_StartTheWorld(interp); return 0; } set_events(&interp->monitors, tool_id, events); uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (new_version == 0) { PyErr_Format(PyExc_OverflowError, "events set too many times"); + _PyEval_StartTheWorld(interp); return -1; } set_global_version(tstate, new_version); _Py_Executors_InvalidateAll(interp, 1); - return instrument_all_executing_code_objects(interp); + int res = instrument_all_executing_code_objects(interp); + _PyEval_StartTheWorld(interp); + return res; } int @@ -2158,15 +2254,21 @@ monitoring_restart_events_impl(PyObject *module) */ PyThreadState *tstate = _PyThreadState_GET(); PyInterpreterState *interp = tstate->interp; + + _PyEval_StopTheWorld(interp); uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; if (new_version <= MONITORING_VERSION_INCREMENT) { PyErr_Format(PyExc_OverflowError, "events set too many times"); + _PyEval_StartTheWorld(interp); return NULL; } interp->last_restart_version = restart_version; set_global_version(tstate, new_version); - if (instrument_all_executing_code_objects(interp)) { + int res = instrument_all_executing_code_objects(interp); + _PyEval_StartTheWorld(interp); + + if (res) { return NULL; } Py_RETURN_NONE; diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index ccbb3eb3f7c82a..ce25e725664345 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -16,6 +16,13 @@ typedef struct _PyLegacyEventHandler { int event; } _PyLegacyEventHandler; +#ifdef Py_GIL_DISABLED +#define LOCK_SETUP() PyMutex_Lock(&_PyRuntime.ceval.sys_trace_profile_mutex); +#define UNLOCK_SETUP() PyMutex_Unlock(&_PyRuntime.ceval.sys_trace_profile_mutex); +#else +#define LOCK_SETUP() +#define UNLOCK_SETUP() +#endif /* The Py_tracefunc function expects the following arguments: * obj: the trace object (PyObject *) * frame: the current frame (PyFrameObject *) @@ -414,19 +421,10 @@ is_tstate_valid(PyThreadState *tstate) } #endif -int -_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +static Py_ssize_t +setup_profile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_profileobj) { - assert(is_tstate_valid(tstate)); - /* The caller must hold the GIL */ - assert(PyGILState_Check()); - - /* Call _PySys_Audit() in the context of the current thread state, - even if tstate is not the current thread state. */ - PyThreadState *current_tstate = _PyThreadState_GET(); - if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) { - return -1; - } + *old_profileobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ if (!tstate->interp->sys_profile_initialized) { tstate->interp->sys_profile_initialized = true; @@ -469,25 +467,15 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) int delta = (func != NULL) - (tstate->c_profilefunc != NULL); tstate->c_profilefunc = func; - PyObject *old_profileobj = tstate->c_profileobj; + *old_profileobj = tstate->c_profileobj; tstate->c_profileobj = Py_XNewRef(arg); - Py_XDECREF(old_profileobj); tstate->interp->sys_profiling_threads += delta; assert(tstate->interp->sys_profiling_threads >= 0); - - uint32_t events = 0; - if (tstate->interp->sys_profiling_threads) { - events = - (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | - (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | - (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | - (1 << PY_MONITORING_EVENT_PY_THROW); - } - return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); + return tstate->interp->sys_profiling_threads; } int -_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) { assert(is_tstate_valid(tstate)); /* The caller must hold the GIL */ @@ -496,11 +484,32 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) /* Call _PySys_Audit() in the context of the current thread state, even if tstate is not the current thread state. */ PyThreadState *current_tstate = _PyThreadState_GET(); - if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { + if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) { return -1; } - assert(tstate->interp->sys_tracing_threads >= 0); + // needs to be decref'd outside of the lock + PyObject *old_profileobj; + LOCK_SETUP(); + int profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); + UNLOCK_SETUP(); + Py_XDECREF(old_profileobj); + + uint32_t events = 0; + if (profiling_threads) { + events = + (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | + (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | + (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | + (1 << PY_MONITORING_EVENT_PY_THROW); + } + return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); +} + +static Py_ssize_t +setup_tracing(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_traceobj) +{ + *old_traceobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ if (!tstate->interp->sys_trace_initialized) { tstate->interp->sys_trace_initialized = true; @@ -553,14 +562,40 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) int delta = (func != NULL) - (tstate->c_tracefunc != NULL); tstate->c_tracefunc = func; - PyObject *old_traceobj = tstate->c_traceobj; + *old_traceobj = tstate->c_traceobj; tstate->c_traceobj = Py_XNewRef(arg); - Py_XDECREF(old_traceobj); tstate->interp->sys_tracing_threads += delta; assert(tstate->interp->sys_tracing_threads >= 0); + return tstate->interp->sys_tracing_threads; +} + +int +_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +{ + assert(is_tstate_valid(tstate)); + /* The caller must hold the GIL */ + assert(PyGILState_Check()); + + /* Call _PySys_Audit() in the context of the current thread state, + even if tstate is not the current thread state. */ + PyThreadState *current_tstate = _PyThreadState_GET(); + if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { + return -1; + } + + assert(tstate->interp->sys_tracing_threads >= 0); + // needs to be decref'd outside of the lock + PyObject *old_traceobj; + LOCK_SETUP(); + int tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); + UNLOCK_SETUP(); + Py_XDECREF(old_traceobj); + if (tracing_threads < 0) { + return -1; + } uint32_t events = 0; - if (tstate->interp->sys_tracing_threads) { + if (tracing_threads) { events = (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | diff --git a/Python/pystate.c b/Python/pystate.c index 37480df88aeb72..06806bd75fbcb2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -399,6 +399,7 @@ _Py_COMP_DIAG_POP &(runtime)->unicode_state.ids.mutex, \ &(runtime)->imports.extensions.mutex, \ &(runtime)->ceval.pending_mainthread.mutex, \ + &(runtime)->ceval.sys_trace_profile_mutex, \ &(runtime)->atexit.mutex, \ &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \ From fe19d59273bb3b0e2db553e8fd98d6788afdaebc Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 25 Mar 2024 13:03:27 -0700 Subject: [PATCH 10/19] Lock on allocation, use version as sync point --- Include/cpython/pyatomic.h | 8 + Include/cpython/pyatomic_gcc.h | 8 + Include/cpython/pyatomic_msc.h | 25 ++ Include/cpython/pyatomic_std.h | 16 ++ Include/internal/pycore_gc.h | 2 +- .../internal/pycore_pyatomic_ft_wrappers.h | 6 + .../test_free_threading/test_monitoring.py | 2 +- Python/bytecodes.c | 6 +- Python/ceval.c | 1 + Python/generated_cases.c.h | 5 +- Python/instrumentation.c | 220 +++++++++--------- Python/legacy_tracing.c | 3 +- 12 files changed, 183 insertions(+), 119 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index c3e132d3877ca5..69083f1d9dd0c2 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -465,10 +465,16 @@ _Py_atomic_store_ullong_relaxed(unsigned long long *obj, static inline void * _Py_atomic_load_ptr_acquire(const void *obj); +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj); + // Stores `*obj = value` (release operation) static inline void _Py_atomic_store_ptr_release(void *obj, void *value); +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value); + static inline void _Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value); @@ -491,6 +497,8 @@ static inline Py_ssize_t _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj); + + // --- _Py_atomic_fence ------------------------------------------------------ // Sequential consistency fence. C11 fences have complex semantics. When diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index 0b40f81bd8736d..af78a94c736545 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -492,10 +492,18 @@ static inline void * _Py_atomic_load_ptr_acquire(const void *obj) { return (void *)__atomic_load_n((void **)obj, __ATOMIC_ACQUIRE); } +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ return (uintptr_t)__atomic_load_n((uintptr_t *)obj, __ATOMIC_ACQUIRE); } + static inline void _Py_atomic_store_ptr_release(void *obj, void *value) { __atomic_store_n((void **)obj, value, __ATOMIC_RELEASE); } +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + static inline void _Py_atomic_store_int_release(int *obj, int value) { __atomic_store_n(obj, value, __ATOMIC_RELEASE); } diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index 3205e253b28546..c8021714f48790 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -914,6 +914,18 @@ _Py_atomic_load_ptr_acquire(const void *obj) #endif } +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(uintptr_t volatile *)obj; +#elif defined(_M_ARM64) + return (uintptr_t)__ldar64((unsigned __int64 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_ptr_acquire" +#endif +} + static inline void _Py_atomic_store_ptr_release(void *obj, void *value) { @@ -926,6 +938,19 @@ _Py_atomic_store_ptr_release(void *obj, void *value) #endif } +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(uintptr_t volatile *)obj = value; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(unsigned __int64); + __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value); +#else +# error "no implementation of _Py_atomic_store_int_release" +#endif +} + static inline void _Py_atomic_store_int_release(int *obj, int value) { diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index ef34bb0b77dfe5..6a77eae536d8dd 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -863,6 +863,14 @@ _Py_atomic_load_ptr_acquire(const void *obj) memory_order_acquire); } +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uintptr_t)*)obj, + memory_order_acquire); +} + static inline void _Py_atomic_store_ptr_release(void *obj, void *value) { @@ -871,6 +879,14 @@ _Py_atomic_store_ptr_release(void *obj, void *value) memory_order_release); } +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uintptr_t)*)obj, value, + memory_order_release); +} + static inline void _Py_atomic_store_int_release(int *obj, int value) { diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 60020b5c01f8a6..9e465fdd86279f 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -93,7 +93,7 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) { * threads and needs special purpose when freeing due to * the possibility of in-flight lock-free reads occurring. * Objects with this bit that are GC objects will automatically - * delay-freed by PyObject_GC_Del. */ + * delay-freed by PyObject_GC_Del. */ static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) { return (op->ob_gc_bits & _PyGC_BITS_SHARED_INLINE) != 0; } diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 3f0c413ad31b5b..7b187209d68f98 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -28,10 +28,14 @@ extern "C" { _Py_atomic_store_ptr(&value, new_value) #define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \ _Py_atomic_load_ptr_acquire(&value) +#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) \ + _Py_atomic_load_uintptr_acquire(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) +#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) \ + _Py_atomic_store_uintptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ @@ -42,8 +46,10 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value #define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value +#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index b10a0e39053c1c..e170840ca3cb27 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -49,7 +49,7 @@ def after_test(self): """Runs once after the test is done""" pass - def test_instrumention(self): + def test_instrumentation(self): # Setup a bunch of functions which will need instrumentation... funcs = [] for i in range(self.func_count): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c34d702f06418e..915173474701fe 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -20,6 +20,7 @@ #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_opcode_metadata.h" // uop names #include "pycore_opcode_utils.h" // MAKE_FUNCTION_* +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_ACQUIRE #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_range.h" // _PyRangeIterObject @@ -150,10 +151,11 @@ dummy_func( uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + PyCodeObject *code = _PyFrame_GetCode(frame); + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version); assert((code_version & 255) == 0); if (code_version != global_version) { - int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + int err = _Py_Instrument(code, tstate->interp); ERROR_IF(err, error); next_instr = this_instr; } diff --git a/Python/ceval.c b/Python/ceval.c index b88e555ded5c2e..2f217c5f33c6ce 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -20,6 +20,7 @@ #include "pycore_opcode_metadata.h" // EXTRA_CASES #include "pycore_optimizer.h" // _PyUOpExecutor_Type #include "pycore_opcode_utils.h" // MAKE_FUNCTION_* +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_ACQUIRE #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_range.h" // _PyRangeIterObject diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a7764b0ec12e10..9da979b2e4cd26 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4927,10 +4927,11 @@ uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + PyCodeObject *code = _PyFrame_GetCode(frame); + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version); assert((code_version & 255) == 0); if (code_version != global_version) { - int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + int err = _Py_Instrument(code, tstate->interp); if (err) goto error; next_instr = this_instr; } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 4b743135f6d203..8e21add2a23492 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -14,7 +14,7 @@ #include "pycore_namespace.h" #include "pycore_object.h" #include "pycore_opcode_metadata.h" // IS_VALID_OPCODE, _PyOpcode_Caches -#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINT8_RELAXED +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINTPTR_RELEASE #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -22,12 +22,26 @@ // #define INSTRUMENT_DEBUG 1 #if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) + #define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) \ if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); \ } +#define ASSERT_WORLD_STOPPED() assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); + +#define LOCK_CODE(code) \ + assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped); \ + Py_BEGIN_CRITICAL_SECTION(code) + +#define UNLOCK_CODE(code) Py_END_CRITICAL_SECTION() + #else + #define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) +#define ASSERT_WORLD_STOPPED() +#define LOCK_CODE(code) +#define UNLOCK_CODE() + #endif PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type); @@ -286,39 +300,19 @@ compute_line(PyCodeObject *code, int offset, int8_t line_delta) return PyCode_Addr2Line(code, offset * sizeof(_Py_CODEUNIT)); } -static inline _PyCoMonitoringData * -get_monitoring(PyCodeObject *code) { - return FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring); -} - -static inline _PyCoLineInstrumentationData * -get_lines_data(PyCodeObject *code) -{ - _PyCoMonitoringData *monitoring = get_monitoring(code); - return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->lines); -} - -static inline uint8_t * -get_per_instruction_opcodes(PyCodeObject *code) -{ - _PyCoMonitoringData *monitoring = get_monitoring(code); - return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->per_instruction_opcodes); -} - int _PyInstruction_GetLength(PyCodeObject *code, int offset) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); - _PyCoMonitoringData *monitoring = get_monitoring(code); int opcode = _PyCode_CODE(code)[offset].op.code; assert(opcode != 0); assert(opcode != RESERVED); if (opcode == INSTRUMENTED_LINE) { - opcode = monitoring->lines[offset].original_opcode; + opcode = code->_co_monitoring->lines[offset].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = monitoring->per_instruction_opcodes[offset]; + opcode = code->_co_monitoring->per_instruction_opcodes[offset]; } int deinstrumented = DE_INSTRUMENT[opcode]; if (deinstrumented) { @@ -587,12 +581,10 @@ int _Py_GetBaseOpcode(PyCodeObject *code, int i) { int opcode = _PyCode_CODE(code)[i].op.code; if (opcode == INSTRUMENTED_LINE) { - _PyCoLineInstrumentationData *lines = get_lines_data(code); - opcode = lines[i].original_opcode; + opcode = code->_co_monitoring->lines[i].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - uint8_t *per_instr_opcodes = get_per_instruction_opcodes(code); - opcode = per_instr_opcodes[i]; + opcode = code->_co_monitoring->per_instruction_opcodes[i]; } CHECK(opcode != INSTRUMENTED_INSTRUCTION); CHECK(opcode != INSTRUMENTED_LINE); @@ -626,7 +618,7 @@ de_instrument(PyCodeObject *code, int i, int event) return; } CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, deinstrumented); + *opcode_ptr = deinstrumented; if (_PyOpcode_Caches[deinstrumented]) { instr[1].counter = adaptive_counter_warmup(); } @@ -670,7 +662,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i) int original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; CHECK(original_opcode != 0); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode); + *opcode_ptr = original_opcode; if (_PyOpcode_Caches[original_opcode]) { instr[1].counter = adaptive_counter_warmup(); } @@ -703,7 +695,7 @@ instrument(PyCodeObject *code, int i) int deopt = _PyOpcode_Deopt[opcode]; int instrumented = INSTRUMENTED_OPCODES[deopt]; assert(instrumented); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, instrumented); + *opcode_ptr = instrumented; if (_PyOpcode_Caches[deopt]) { instr[1].counter = adaptive_counter_warmup(); } @@ -721,7 +713,7 @@ instrument_line(PyCodeObject *code, int i) _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; lines->original_opcode = _PyOpcode_Deopt[opcode]; CHECK(lines->original_opcode > 0); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE); + *opcode_ptr = INSTRUMENTED_LINE; } static void @@ -750,13 +742,13 @@ instrument_per_instruction(PyCodeObject *code, int i) code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; } assert(code->_co_monitoring->per_instruction_opcodes[i] > 0); - FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION); + *opcode_ptr = INSTRUMENTED_INSTRUCTION; } static void remove_tools(PyCodeObject * code, int offset, int event, int tools) { - Py_BEGIN_CRITICAL_SECTION(code); + ASSERT_WORLD_STOPPED_OR_LOCKED(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -776,7 +768,6 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools) de_instrument(code, offset, event); } } - Py_END_CRITICAL_SECTION(); } #ifndef NDEBUG @@ -792,7 +783,8 @@ tools_is_subset_for_event(PyCodeObject * code, int event, int tools) static void remove_line_tools(PyCodeObject * code, int offset, int tools) { - Py_BEGIN_CRITICAL_SECTION(code); + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -810,7 +802,6 @@ remove_line_tools(PyCodeObject * code, int offset, int tools) de_instrument_line(code, offset); } } - Py_END_CRITICAL_SECTION(); } static void @@ -873,7 +864,7 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools) static void remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) { - Py_BEGIN_CRITICAL_SECTION(code); + ASSERT_WORLD_STOPPED_OR_LOCKED(code); assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { @@ -891,7 +882,6 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) de_instrument_per_instruction(code, offset); } } - Py_END_CRITICAL_SECTION(); } @@ -1019,9 +1009,8 @@ get_tools_for_instruction(PyCodeObject *code, PyInterpreterState *interp, int i, if (PY_MONITORING_IS_INSTRUMENTED_EVENT(event)) { CHECK(is_version_up_to_date(code, interp)); CHECK(instrumentation_cross_checks(interp, code)); - uint8_t *monitoring_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring->tools); - if (monitoring_tools) { - tools = monitoring_tools[i]; + if (code->_co_monitoring->tools) { + tools = code->_co_monitoring->tools[i]; } else { tools = code->_co_monitoring->active_monitors.tools[event]; @@ -1107,7 +1096,9 @@ call_instrumentation_vector( break; } else { + LOCK_CODE(code); remove_tools(code, offset, event, 1 << tool); + UNLOCK_CODE(); } } } @@ -1200,7 +1191,7 @@ _Py_call_instrumentation_exc2( int _Py_Instrumentation_GetLine(PyCodeObject *code, int index) { - _PyCoMonitoringData *monitoring = get_monitoring(code); + _PyCoMonitoringData *monitoring = code->_co_monitoring; assert(monitoring != NULL); assert(monitoring->lines != NULL); assert(index >= code->_co_firsttraceable); @@ -1219,7 +1210,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, assert(instrumentation_cross_checks(tstate->interp, code)); int i = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *monitoring = get_monitoring(code); + _PyCoMonitoringData *monitoring = code->_co_monitoring; _PyCoLineInstrumentationData *line_data = &monitoring->lines[i]; if (tstate->tracing) { goto done; @@ -1241,11 +1232,10 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, } } - uint8_t *line_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->line_tools); - uint8_t tools = line_tools != NULL ? - line_tools[i] : + uint8_t tools = code->_co_monitoring->line_tools != NULL ? + code->_co_monitoring->line_tools[i] : (interp->monitors.tools[PY_MONITORING_EVENT_LINE] | - monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] + code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] ); /* Special case sys.settrace to avoid boxing the line number, * only to immediately unbox it. */ @@ -1302,7 +1292,9 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, } else { /* DISABLE */ + LOCK_CODE(code); remove_line_tools(code, i, 1 << tool); + UNLOCK_CODE(); } } while (tools); Py_DECREF(line_obj); @@ -1322,17 +1314,15 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* assert(is_version_up_to_date(code, tstate->interp)); assert(instrumentation_cross_checks(tstate->interp, code)); int offset = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *instr_data = get_monitoring(code); - uint8_t *per_instr_opcodes = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_opcodes); - assert(per_instr_opcodes); - int next_opcode = per_instr_opcodes[offset]; + _PyCoMonitoringData *instrumentation_data = code->_co_monitoring; + assert(instrumentation_data->per_instruction_opcodes); + int next_opcode = instrumentation_data->per_instruction_opcodes[offset]; if (tstate->tracing) { return next_opcode; } PyInterpreterState *interp = tstate->interp; - uint8_t *per_instr_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_tools); - uint8_t tools = per_instr_tools != NULL ? - per_instr_tools[offset] : + uint8_t tools = instrumentation_data->per_instruction_tools != NULL ? + instrumentation_data->per_instruction_tools[offset] : (interp->monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] | code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] ); @@ -1360,7 +1350,9 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* } else { /* DISABLE */ + LOCK_CODE(code); remove_per_instruction_tools(code, offset, 1 << tool); + UNLOCK_CODE(); } } Py_DECREF(offset_obj); @@ -1388,9 +1380,10 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) } static void -initialize_tools(PyCodeObject *code, uint8_t* tools) +initialize_tools(PyCodeObject *code) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); + uint8_t* tools = code->_co_monitoring->tools; assert(tools != NULL); int code_len = (int)Py_SIZE(code); @@ -1445,9 +1438,10 @@ initialize_tools(PyCodeObject *code, uint8_t* tools) #define NO_LINE -128 static void -initialize_lines(PyCodeObject *code, _PyCoLineInstrumentationData *line_data) +initialize_lines(PyCodeObject *code) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); + _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; assert(line_data != NULL); int code_len = (int)Py_SIZE(code); @@ -1563,9 +1557,10 @@ initialize_lines(PyCodeObject *code, _PyCoLineInstrumentationData *line_data) } static void -initialize_line_tools(PyCodeObject *code, uint8_t *line_tools, _Py_LocalMonitors *all_events) +initialize_line_tools(PyCodeObject *code, _Py_LocalMonitors *all_events) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); + uint8_t *line_tools = code->_co_monitoring->line_tools; assert(line_tools != NULL); int code_len = (int)Py_SIZE(code); @@ -1577,21 +1572,21 @@ initialize_line_tools(PyCodeObject *code, uint8_t *line_tools, _Py_LocalMonitors static int allocate_instrumentation_data(PyCodeObject *code) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); if (code->_co_monitoring == NULL) { - _PyCoMonitoringData *monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); - if (monitoring == NULL) { + code->_co_monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); + if (code->_co_monitoring == NULL) { PyErr_NoMemory(); return -1; } - monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; - monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; - monitoring->tools = NULL; - monitoring->lines = NULL; - monitoring->line_tools = NULL; - monitoring->per_instruction_opcodes = NULL; - monitoring->per_instruction_tools = NULL; - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring, monitoring); + code->_co_monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; + code->_co_monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; + code->_co_monitoring->tools = NULL; + code->_co_monitoring->lines = NULL; + code->_co_monitoring->line_tools = NULL; + code->_co_monitoring->per_instruction_opcodes = NULL; + code->_co_monitoring->per_instruction_tools = NULL; } return 0; } @@ -1610,60 +1605,53 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) code->_co_monitoring->local_monitors); bool multitools = multiple_tools(&all_events); if (code->_co_monitoring->tools == NULL && multitools) { - uint8_t *tools = PyMem_Malloc(code_len); - if (tools == NULL) { + code->_co_monitoring->tools = PyMem_Malloc(code_len); + if (code->_co_monitoring->tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_tools(code, tools); - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->tools, tools); + initialize_tools(code); } if (all_events.tools[PY_MONITORING_EVENT_LINE]) { if (code->_co_monitoring->lines == NULL) { - _PyCoLineInstrumentationData *lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (lines == NULL) { + code->_co_monitoring->lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (code->_co_monitoring->lines == NULL) { PyErr_NoMemory(); return -1; } - initialize_lines(code, lines); - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->lines, lines); + initialize_lines(code); } if (multitools && code->_co_monitoring->line_tools == NULL) { - uint8_t *line_tools = PyMem_Malloc(code_len); - if (line_tools == NULL) { + code->_co_monitoring->line_tools = PyMem_Malloc(code_len); + if (code->_co_monitoring->line_tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_line_tools(code, line_tools, &all_events); - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->line_tools, line_tools); + initialize_line_tools(code, &all_events); } } if (all_events.tools[PY_MONITORING_EVENT_INSTRUCTION]) { if (code->_co_monitoring->per_instruction_opcodes == NULL) { - uint8_t *per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (per_instruction_opcodes == NULL) { + code->_co_monitoring->per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (code->_co_monitoring->per_instruction_opcodes == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - per_instruction_opcodes[i] = 0; + code->_co_monitoring->per_instruction_opcodes[i] = 0; } - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_opcodes, - per_instruction_opcodes); } if (multitools && code->_co_monitoring->per_instruction_tools == NULL) { - uint8_t *per_instruction_tools = PyMem_Malloc(code_len); - if (per_instruction_tools == NULL) { + code->_co_monitoring->per_instruction_tools = PyMem_Malloc(code_len); + if (code->_co_monitoring->per_instruction_tools == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - per_instruction_tools[i] = 0; + code->_co_monitoring->per_instruction_tools[i] = 0; } - FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_tools, - per_instruction_tools); } } return 0; @@ -1713,12 +1701,8 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) assert(monitors_are_empty(monitors_and(new_events, removed_events))); } code->_co_monitoring->active_monitors = active_events; - code->_co_instrumentation_version = global_version(interp); if (monitors_are_empty(new_events) && monitors_are_empty(removed_events)) { -#ifdef INSTRUMENT_DEBUG - sanity_check_instrumentation(code); -#endif - return 0; + goto done; } /* Insert instrumentation */ for (int i = code->_co_firsttraceable; i < code_len; i+= _PyInstruction_GetLength(code, i)) { @@ -1807,6 +1791,10 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) i += _PyInstruction_GetLength(code, i); } } +done: + FT_ATOMIC_STORE_UINTPTR_RELEASE(code->_co_instrumentation_version, + global_version(interp)); + #ifdef INSTRUMENT_DEBUG sanity_check_instrumentation(code); #endif @@ -1817,9 +1805,9 @@ int _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) { int res; - Py_BEGIN_CRITICAL_SECTION(code); + LOCK_CODE(code); res = instrument_lock_held(code, interp); - Py_END_CRITICAL_SECTION(); + UNLOCK_CODE(); return res; } @@ -1833,9 +1821,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) static int instrument_all_executing_code_objects(PyInterpreterState *interp) { -#ifdef Py_GIL_DISABLED - assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); -#endif + ASSERT_WORLD_STOPPED(); _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1908,22 +1894,25 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) if (check_tool(interp, tool_id)) { return -1; } + + int res; _PyEval_StopTheWorld(interp); uint32_t existing_events = get_events(&interp->monitors, tool_id); if (existing_events == events) { - _PyEval_StartTheWorld(interp); - return 0; + res = 0; + goto done; } set_events(&interp->monitors, tool_id, events); uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (new_version == 0) { PyErr_Format(PyExc_OverflowError, "events set too many times"); - _PyEval_StartTheWorld(interp); - return -1; + res = -1; + goto done; } set_global_version(tstate, new_version); _Py_Executors_InvalidateAll(interp, 1); - int res = instrument_all_executing_code_objects(interp); + res = instrument_all_executing_code_objects(interp); +done: _PyEval_StartTheWorld(interp); return res; } @@ -1941,24 +1930,33 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent if (check_tool(interp, tool_id)) { return -1; } + + int res; + LOCK_CODE(code); if (allocate_instrumentation_data(code)) { - return -1; + res = -1; + goto done; } + _Py_LocalMonitors *local = &code->_co_monitoring->local_monitors; uint32_t existing_events = get_local_events(local, tool_id); if (existing_events == events) { - return 0; + res = 0; + goto done; } set_local_events(local, tool_id, events); if (is_version_up_to_date(code, interp)) { /* Force instrumentation update */ code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT; } + _Py_Executors_InvalidateDependency(interp, code, 1); - if (_Py_Instrument(code, interp)) { - return -1; - } - return 0; + + res = instrument_lock_held(code, interp); + +done: + UNLOCK_CODE(); + return res; } int @@ -2259,8 +2257,8 @@ monitoring_restart_events_impl(PyObject *module) uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; if (new_version <= MONITORING_VERSION_INCREMENT) { - PyErr_Format(PyExc_OverflowError, "events set too many times"); _PyEval_StartTheWorld(interp); + PyErr_Format(PyExc_OverflowError, "events set too many times"); return NULL; } interp->last_restart_version = restart_version; diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index ce25e725664345..fcaa9b226ce40c 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -491,7 +491,7 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) // needs to be decref'd outside of the lock PyObject *old_profileobj; LOCK_SETUP(); - int profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); + Py_ssize_t profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); UNLOCK_SETUP(); Py_XDECREF(old_profileobj); @@ -582,7 +582,6 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { return -1; } - assert(tstate->interp->sys_tracing_threads >= 0); // needs to be decref'd outside of the lock PyObject *old_traceobj; From 210b8023c0b489961bf8f12751fc8d44cd982b21 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 28 Mar 2024 15:29:55 -0700 Subject: [PATCH 11/19] Lock in non-debug builds --- Python/instrumentation.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 8e21add2a23492..3543537ca34cb9 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -29,6 +29,15 @@ } #define ASSERT_WORLD_STOPPED() assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); +#else + +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) +#define ASSERT_WORLD_STOPPED() + +#endif + +#ifdef Py_GIL_DISABLED + #define LOCK_CODE(code) \ assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped); \ Py_BEGIN_CRITICAL_SECTION(code) @@ -37,8 +46,6 @@ #else -#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) -#define ASSERT_WORLD_STOPPED() #define LOCK_CODE(code) #define UNLOCK_CODE() From 597f40d5b493fd5dba040912fecd48a3a4917987 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Fri, 29 Mar 2024 12:56:11 -0700 Subject: [PATCH 12/19] Move CHECK_EVAL_BREAKER in ENTER_EXECUTOR --- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 915173474701fe..c16e11e7fc2f1e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -173,7 +173,7 @@ dummy_func( _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version); } @@ -2379,7 +2379,6 @@ dummy_func( }; tier1 inst(ENTER_EXECUTOR, (--)) { - CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2388,6 +2387,7 @@ dummy_func( assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); + CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fccff24a418586..df87f9178f17cf 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -21,7 +21,7 @@ _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); if (eval_breaker != version) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9da979b2e4cd26..efe73a27fa7b2c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2501,7 +2501,6 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(ENTER_EXECUTOR); - CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2510,6 +2509,7 @@ assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); + CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); DISPATCH(); } @@ -4954,7 +4954,7 @@ _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); - uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version, RESUME); DISPATCH(); From 8393bfe0eb365d1fcb04f5032d88e28e493bf1e6 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 2 Apr 2024 11:48:51 -0700 Subject: [PATCH 13/19] Don't potentially leak executor --- Include/cpython/pyatomic_msc.h | 4 ++-- Include/internal/pycore_pyatomic_ft_wrappers.h | 13 +++++++++++++ Python/bytecodes.c | 9 ++++++++- Python/generated_cases.c.h | 11 +++++++++-- Python/instrumentation.c | 11 +++-------- Python/legacy_tracing.c | 2 +- Tools/jit/template.c | 1 + 7 files changed, 37 insertions(+), 14 deletions(-) diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index c8021714f48790..212cd7817d01c5 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -922,7 +922,7 @@ _Py_atomic_load_uintptr_acquire(const uintptr_t *obj) #elif defined(_M_ARM64) return (uintptr_t)__ldar64((unsigned __int64 volatile *)obj); #else -# error "no implementation of _Py_atomic_load_ptr_acquire" +# error "no implementation of _Py_atomic_load_uintptr_acquire" #endif } @@ -947,7 +947,7 @@ _Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) _Py_atomic_ASSERT_ARG_TYPE(unsigned __int64); __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value); #else -# error "no implementation of _Py_atomic_store_int_release" +# error "no implementation of _Py_atomic_store_uintptr_release" #endif } diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 7b187209d68f98..a16855729bf0fc 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -40,6 +40,9 @@ extern "C" { _Py_atomic_store_ssize_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ _Py_atomic_store_uint8_relaxed(&value, new_value) +#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ + _Py_atomic_exchange_ptr(&value, new_value) + #else #define FT_ATOMIC_LOAD_PTR(value) value #define FT_ATOMIC_LOAD_SSIZE(value) value @@ -52,6 +55,16 @@ extern "C" { #define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ + _atomic_exchange_pyobject_withgil(&value, new_value) + +static inline PyObject * +_atomic_exchange_pyobject_withgil(PyObject **src, PyObject *new_value) +{ + PyObject *res = *src; + *src = new_value; + return res; +} #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c16e11e7fc2f1e..a64098dd8dacb4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2379,6 +2379,14 @@ dummy_func( }; tier1 inst(ENTER_EXECUTOR, (--)) { + int prevoparg = oparg; + CHECK_EVAL_BREAKER(); + if (this_instr->op.code != ENTER_EXECUTOR || + this_instr->op.arg != prevoparg) { + next_instr = this_instr; + DISPATCH(); + } + PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2387,7 +2395,6 @@ dummy_func( assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); - CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index efe73a27fa7b2c..7d3c6ea673fd11 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2498,9 +2498,17 @@ } TARGET(ENTER_EXECUTOR) { - frame->instr_ptr = next_instr; + _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 1; INSTRUCTION_STATS(ENTER_EXECUTOR); + int prevoparg = oparg; + CHECK_EVAL_BREAKER(); + if (this_instr->op.code != ENTER_EXECUTOR || + this_instr->op.arg != prevoparg) { + next_instr = this_instr; + DISPATCH(); + } PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -2509,7 +2517,6 @@ assert(tstate->previous_executor == NULL); tstate->previous_executor = Py_None; Py_INCREF(executor); - CHECK_EVAL_BREAKER(); GOTO_TIER_TWO(executor); DISPATCH(); } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 3543537ca34cb9..f52921ace7bbeb 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -42,7 +42,7 @@ assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped); \ Py_BEGIN_CRITICAL_SECTION(code) -#define UNLOCK_CODE(code) Py_END_CRITICAL_SECTION() +#define UNLOCK_CODE() Py_END_CRITICAL_SECTION() #else @@ -1374,15 +1374,10 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) PyInterpreterState *is = _PyInterpreterState_GET(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS); -#ifdef Py_GIL_DISABLED - PyObject *callback = _Py_atomic_exchange_ptr( - &is->monitoring_callables[tool_id][event_id], + PyObject *callback = FT_ATOMIC_EXCHANGE_PYOBJECT(is->monitoring_callables[tool_id][event_id], Py_XNewRef(obj) ); -#else - PyObject *callback = is->monitoring_callables[tool_id][event_id]; - is->monitoring_callables[tool_id][event_id] = Py_XNewRef(obj); -#endif + return callback; } diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index fcaa9b226ce40c..d7aae7d2343ac2 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -586,7 +586,7 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) // needs to be decref'd outside of the lock PyObject *old_traceobj; LOCK_SETUP(); - int tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); + Py_ssize_t tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); UNLOCK_SETUP(); Py_XDECREF(old_traceobj); if (tracing_threads < 0) { diff --git a/Tools/jit/template.c b/Tools/jit/template.c index b195aff377b3b5..228dc83254d678 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -12,6 +12,7 @@ #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" #include "pycore_optimizer.h" +#include "pycore_pyatomic_ft_wrappers.h" #include "pycore_range.h" #include "pycore_setobject.h" #include "pycore_sliceobject.h" From 65ff505212bb7369bd94cfab357d28a4a49ec6a1 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 4 Apr 2024 15:51:50 -0700 Subject: [PATCH 14/19] Just use _Py_atomic_exchange_ptr instead of FT macro --- Include/internal/pycore_pyatomic_ft_wrappers.h | 12 ------------ Python/instrumentation.c | 5 ++--- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index a16855729bf0fc..fed5d6e0ec2c54 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -40,8 +40,6 @@ extern "C" { _Py_atomic_store_ssize_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ _Py_atomic_store_uint8_relaxed(&value, new_value) -#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ - _Py_atomic_exchange_ptr(&value, new_value) #else #define FT_ATOMIC_LOAD_PTR(value) value @@ -55,16 +53,6 @@ extern "C" { #define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value -#define FT_ATOMIC_EXCHANGE_PYOBJECT(value, new_value) \ - _atomic_exchange_pyobject_withgil(&value, new_value) - -static inline PyObject * -_atomic_exchange_pyobject_withgil(PyObject **src, PyObject *new_value) -{ - PyObject *res = *src; - *src = new_value; - return res; -} #endif diff --git a/Python/instrumentation.c b/Python/instrumentation.c index f52921ace7bbeb..71efeff077633d 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1374,9 +1374,8 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) PyInterpreterState *is = _PyInterpreterState_GET(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS); - PyObject *callback = FT_ATOMIC_EXCHANGE_PYOBJECT(is->monitoring_callables[tool_id][event_id], - Py_XNewRef(obj) - ); + PyObject *callback = _Py_atomic_exchange_ptr(&is->monitoring_callables[tool_id][event_id], + Py_XNewRef(obj)); return callback; } From 6e1960151ce3a7469567c6973e2871f2f6628336 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 18 Apr 2024 12:41:00 -0700 Subject: [PATCH 15/19] Use FT_ATOMIC_LOAD_UINTPTR_ACQUIRE for INSTRUMENTED_RESUME too --- Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a64098dd8dacb4..c1fbd3c7d26e01 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -180,7 +180,7 @@ dummy_func( inst(INSTRUMENTED_RESUME, (--)) { uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { ERROR_NO_POP(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7d3c6ea673fd11..a426d9e208492e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3274,7 +3274,7 @@ next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_RESUME); uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; - uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { goto error; From 8860a7f22affaeade880a0dd9507ced4e71233da Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Fri, 19 Apr 2024 09:48:01 -0700 Subject: [PATCH 16/19] Handle dict replacement case better --- Objects/dictobject.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 436e98aa222e59..198a753f1b7e08 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7048,17 +7048,9 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) Py_BEGIN_CRITICAL_SECTION2(dict, obj); -#ifdef Py_DEBUG - // If the dict in the object has been replaced between when we got - // the dict and unlocked the objects then it's definitely no longer - // inline and there's no need to detach it, we can just replace it. - // The call to _PyDict_DetachFromObject will be a nop. - PyDictObject *cur_dict = _PyObject_ManagedDictPointer(obj)->dict; - assert(cur_dict == dict || - (cur_dict->ma_values != _PyObject_InlineValues(obj) && - dict->ma_values != _PyObject_InlineValues(obj) && - !_PyObject_InlineValues(obj)->valid)); -#endif + // We've locked dict, but the actual dict could have changed + // since we locked it. + dict = _PyObject_ManagedDictPointer(obj)->dict; FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict, (PyDictObject *)Py_XNewRef(new_dict)); @@ -7067,7 +7059,7 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) Py_END_CRITICAL_SECTION2(); - Py_DECREF(dict); + Py_XDECREF(dict); } else { PyDictObject *dict; @@ -7095,21 +7087,25 @@ PyObject_ClearManagedDict(PyObject *obj) int _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) { - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); - if (mp->ma_values == NULL || mp->ma_values != _PyObject_InlineValues(obj)) { + if (FT_ATOMIC_LOAD_PTR_RELAXED(mp->ma_values) != _PyObject_InlineValues(obj)) { return 0; } + + // We could be called with an unlocked dict when the caller knows the + // values are already detached, so we assert after inline values check. + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); assert(mp->ma_values->embedded == 1); assert(mp->ma_values->valid == 1); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES); - mp->ma_values = copy_values(mp->ma_values); + PyDictValues *values = copy_values(mp->ma_values); - if (mp->ma_values == NULL) { + if (values == NULL) { return -1; } + mp->ma_values = values; FT_ATOMIC_STORE_UINT8(_PyObject_InlineValues(obj)->valid, 0); From 4b852550a56855eb888bfdd959f32f1d4b004657 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Fri, 19 Apr 2024 11:21:11 -0700 Subject: [PATCH 17/19] Decref materialized dict on fail to set --- Objects/dictobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 198a753f1b7e08..879ac4d71a0620 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6734,6 +6734,7 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, dict = make_dict_from_instance_attributes(PyInterpreterState_Get(), keys, values); if (dict == NULL || set_or_del_lock_held(dict, name, value) < 0) { + Py_XDECREF(dict); return -1; } From 30a4133bf76142e0959da138936e3cbce65c0926 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 12 Apr 2024 17:06:38 +0000 Subject: [PATCH 18/19] gh-117783: Immortalize objects that use deferred reference counting Deferred reference counting is not fully implemented yet. As a temporary measure, we immortalize objects that would use deferred reference counting to avoid multi-threaded scaling bottlenecks. This is only performed in the free-threaded build once the first non-main thread is started. Additionally, some tests, including refleak tests, suppress this behavior. --- Include/internal/pycore_gc.h | 17 +++++++++++++++++ Lib/test/libregrtest/main.py | 8 ++++++-- Lib/test/libregrtest/single.py | 5 ++++- Lib/test/support/__init__.py | 19 +++++++++++++++++++ Lib/test/test_capi/test_watchers.py | 6 +++++- Lib/test/test_code.py | 6 +++++- Lib/test/test_functools.py | 1 + Lib/test/test_weakref.py | 5 ++++- Modules/_testinternalcapi.c | 22 ++++++++++++++++++++++ Objects/object.c | 7 +++++++ Python/gc_free_threading.c | 28 ++++++++++++++++++++++++++++ Python/pylifecycle.c | 17 +++++++++++++++++ Python/pystate.c | 9 +++++++++ 13 files changed, 144 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 60020b5c01f8a6..dfc68965df6594 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -312,6 +312,18 @@ struct _gc_runtime_state { collections, and are awaiting to undergo a full collection for the first time. */ Py_ssize_t long_lived_pending; + + /* gh-117783: Deferred reference counting is not fully implemented yet, so + as a temporary measure we treat objects using deferred referenence + counting as immortal. */ + struct { + /* Immortalize objects instead of marking them as using deferred + reference counting. */ + int enabled; + + /* Set enabled=1 when the first background thread is created. */ + int enable_on_thread; + } immortalize; #endif }; @@ -343,6 +355,11 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp); extern void _Py_ScheduleGC(PyThreadState *tstate); extern void _Py_RunGC(PyThreadState *tstate); +#ifdef Py_GIL_DISABLED +// gh-117783: Immortalize objects that use deferred reference counting +extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp); +#endif + #ifdef __cplusplus } #endif diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 3c9d9620053355..9e7a7d60880091 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -7,7 +7,8 @@ import time import trace -from test.support import os_helper, MS_WINDOWS, flush_std_streams +from test.support import (os_helper, MS_WINDOWS, flush_std_streams, + suppress_immortalization) from .cmdline import _parse_args, Namespace from .findtests import findtests, split_test_packages, list_cases @@ -526,7 +527,10 @@ def _run_tests(self, selected: TestTuple, tests: TestList | None) -> int: if self.num_workers: self._run_tests_mp(runtests, self.num_workers) else: - self.run_tests_sequentially(runtests) + # gh-117783: don't immortalize deferred objects when tracking + # refleaks. Only releveant for the free-threaded build. + with suppress_immortalization(runtests.hunt_refleak): + self.run_tests_sequentially(runtests) coverage = self.results.get_coverage_results() self.display_result(runtests) diff --git a/Lib/test/libregrtest/single.py b/Lib/test/libregrtest/single.py index 235029d8620ff5..fc2f2716ad4ce0 100644 --- a/Lib/test/libregrtest/single.py +++ b/Lib/test/libregrtest/single.py @@ -303,7 +303,10 @@ def run_single_test(test_name: TestName, runtests: RunTests) -> TestResult: result = TestResult(test_name) pgo = runtests.pgo try: - _runtest(result, runtests) + # gh-117783: don't immortalize deferred objects when tracking + # refleaks. Only releveant for the free-threaded build. + with support.suppress_immortalization(runtests.hunt_refleak): + _runtest(result, runtests) except: if not pgo: msg = traceback.format_exc() diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index be3f93ab2e5fd1..83ed7aa5ae43bb 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -515,6 +515,25 @@ def has_no_debug_ranges(): def requires_debug_ranges(reason='requires co_positions / debug_ranges'): return unittest.skipIf(has_no_debug_ranges(), reason) +@contextlib.contextmanager +def suppress_immortalization(suppress=True): + """Suppress immortalization of deferred objects.""" + try: + import _testinternalcapi + except ImportError: + yield + return + + if not suppress: + yield + return + + old_values = _testinternalcapi.set_immortalize_deferred(False) + try: + yield + finally: + _testinternalcapi.set_immortalize_deferred(*old_values) + MS_WINDOWS = (sys.platform == 'win32') # Is not actually used in tests, but is kept for compatibility. diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index 8e84d0077c7573..90665a7561b316 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -1,7 +1,9 @@ import unittest from contextlib import contextmanager, ExitStack -from test.support import catch_unraisable_exception, import_helper, gc_collect +from test.support import ( + catch_unraisable_exception, import_helper, + gc_collect, suppress_immortalization) # Skip this test if the _testcapi module isn't available. @@ -382,6 +384,7 @@ def assert_event_counts(self, exp_created_0, exp_destroyed_0, self.assertEqual( exp_destroyed_1, _testcapi.get_code_watcher_num_destroyed_events(1)) + @suppress_immortalization() def test_code_object_events_dispatched(self): # verify that all counts are zero before any watchers are registered self.assert_event_counts(0, 0, 0, 0) @@ -428,6 +431,7 @@ def test_error(self): self.assertIsNone(cm.unraisable.object) self.assertEqual(str(cm.unraisable.exc_value), "boom!") + @suppress_immortalization() def test_dealloc_error(self): co = _testcapi.code_newempty("test_watchers", "dummy0", 0) with self.code_watcher(2): diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index fe8c672e71a7b5..aa793f56225393 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -141,7 +141,8 @@ ctypes = None from test.support import (cpython_only, check_impl_detail, requires_debug_ranges, - gc_collect, Py_GIL_DISABLED) + gc_collect, Py_GIL_DISABLED, + suppress_immortalization) from test.support.script_helper import assert_python_ok from test.support import threading_helper, import_helper from test.support.bytecode_helper import instructions_with_positions @@ -577,6 +578,7 @@ def test_interned_string_with_null(self): class CodeWeakRefTest(unittest.TestCase): + @suppress_immortalization() def test_basic(self): # Create a code object in a clean environment so that we know we have # the only reference to it left. @@ -827,6 +829,7 @@ def test_bad_index(self): self.assertEqual(GetExtra(f.__code__, FREE_INDEX+100, ctypes.c_voidp(100)), 0) + @suppress_immortalization() def test_free_called(self): # Verify that the provided free function gets invoked # when the code object is cleaned up. @@ -854,6 +857,7 @@ def test_get_set(self): del f @threading_helper.requires_working_threading() + @suppress_immortalization() def test_free_different_thread(self): # Freeing a code object on a different thread then # where the co_extra was set should be safe. diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index ec5f6af5e17842..bb4c7cc8701fb4 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -1833,6 +1833,7 @@ def f(): return 1 self.assertEqual(f.cache_parameters(), {'maxsize': 1000, "typed": True}) + @support.suppress_immortalization() def test_lru_cache_weakrefable(self): @self.module.lru_cache def test_function(x): diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 499ba77fd19542..fa35209e34ae24 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -12,7 +12,7 @@ import random from test import support -from test.support import script_helper, ALWAYS_EQ +from test.support import script_helper, ALWAYS_EQ, suppress_immortalization from test.support import gc_collect from test.support import import_helper from test.support import threading_helper @@ -650,6 +650,7 @@ class C(object): # deallocation of c2. del c2 + @suppress_immortalization() def test_callback_in_cycle(self): import gc @@ -742,6 +743,7 @@ class D: del c1, c2, C, D gc.collect() + @suppress_immortalization() def test_callback_in_cycle_resurrection(self): import gc @@ -877,6 +879,7 @@ def test_init(self): # No exception should be raised here gc.collect() + @suppress_immortalization() def test_classes(self): # Check that classes are weakrefable. class A(object): diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index cc9e1403f87ecd..24ffa412e28357 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1936,6 +1936,27 @@ get_py_thread_id(PyObject *self, PyObject *Py_UNUSED(ignored)) } #endif +static PyObject * +set_immortalize_deferred(PyObject *self, PyObject *value) +{ +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = PyInterpreterState_Get(); + int old_enabled = interp->gc.immortalize.enabled; + int old_enabled_on_thread = interp->gc.immortalize.enable_on_thread; + int enabled_on_thread = 0; + if (!PyArg_ParseTuple(value, "i|i", + &interp->gc.immortalize.enabled, + &enabled_on_thread)) + { + return NULL; + } + interp->gc.immortalize.enable_on_thread = enabled_on_thread; + return Py_BuildValue("ii", old_enabled, old_enabled_on_thread); +#else + Py_RETURN_FALSE; +#endif +} + static PyObject * has_inline_values(PyObject *self, PyObject *obj) { @@ -2029,6 +2050,7 @@ static PyMethodDef module_functions[] = { #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif + {"set_immortalize_deferred", set_immortalize_deferred, METH_VARARGS}, {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/object.c b/Objects/object.c index 214e7c5b567928..47bc6be71aaa9d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2433,6 +2433,13 @@ _PyObject_SetDeferredRefcount(PyObject *op) assert(PyType_IS_GC(Py_TYPE(op))); assert(_Py_IsOwnedByCurrentThread(op)); assert(op->ob_ref_shared == 0); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->gc.immortalize.enabled) { + // gh-117696: immortalize objects instead of using deferred reference + // counting for now. + _Py_SetImmortal(op); + return; + } op->ob_gc_bits |= _PyGC_BITS_DEFERRED; op->ob_ref_local += 1; op->ob_ref_shared = _Py_REF_QUEUED; diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 9cf0e989d0993f..58632036812572 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -704,6 +704,10 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; + if (_Py_IsMainInterpreter(interp)) { + gcstate->immortalize.enable_on_thread = 1; + } + gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { return _PyStatus_NO_MEMORY(); @@ -1781,6 +1785,30 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } +// gh-117783: Immortalize objects that use deferred reference counting to +// temporarily work around scaling bottlenecks. +static bool +immortalize_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op != NULL && _PyObject_HasDeferredRefcount(op)) { + _Py_SetImmortal(op); + op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; + } + return true; +} + +void +_PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp) +{ + struct visitor_args args; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &immortalize_visitor, &args); + interp->gc.immortalize.enabled = 1; + _PyEval_StartTheWorld(interp); +} + void PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index cc1824634e7a7f..7b923e946fcc96 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1491,6 +1491,22 @@ finalize_modules_delete_special(PyThreadState *tstate, int verbose) } } +static void +swap_module_dict(PyModuleObject *mod) +{ + if (_Py_IsImmortal(mod->md_dict)) { + // gh-117783: Immortalizing module dicts can cause some finalizers to + // run much later than typical leading to attribute errors due to + // partially cleared modules. To avoid this, we copy the module dict + // if it was immortalized. + PyObject *copy = PyDict_Copy(mod->md_dict); + if (copy == NULL) { + PyErr_FormatUnraisable("Exception ignored on removing modules"); + return; + } + Py_SETREF(mod->md_dict, copy); + } +} static PyObject* finalize_remove_modules(PyObject *modules, int verbose) @@ -1521,6 +1537,7 @@ finalize_remove_modules(PyObject *modules, int verbose) if (verbose && PyUnicode_Check(name)) { \ PySys_FormatStderr("# cleanup[2] removing %U\n", name); \ } \ + swap_module_dict((PyModuleObject *)mod); \ STORE_MODULE_WEAKREF(name, mod); \ if (PyObject_SetItem(modules, name, Py_None) < 0) { \ PyErr_FormatUnraisable("Exception ignored on removing modules"); \ diff --git a/Python/pystate.c b/Python/pystate.c index 37480df88aeb72..8172fc449af6df 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1567,6 +1567,15 @@ new_threadstate(PyInterpreterState *interp, int whence) // Must be called with lock unlocked to avoid re-entrancy deadlock. PyMem_RawFree(new_tstate); } + else { +#ifdef Py_GIL_DISABLED + if (interp->gc.immortalize.enable_on_thread && !interp->gc.immortalize.enabled) { + // Immortalize objects marked as using deferred reference counting + // the first time a non-main thread is created. + _PyGC_ImmortalizeDeferredObjects(interp); + } +#endif + } #ifdef Py_GIL_DISABLED // Must be called with lock unlocked to avoid lock ordering deadlocks. From 0cab82c1790cf7f2b4e432ba5c38f4e590d960a4 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 19 Apr 2024 19:57:33 +0000 Subject: [PATCH 19/19] Fix set_immortalize_deferred --- Modules/_testinternalcapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 24ffa412e28357..1fcd8dbf05d493 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1953,7 +1953,7 @@ set_immortalize_deferred(PyObject *self, PyObject *value) interp->gc.immortalize.enable_on_thread = enabled_on_thread; return Py_BuildValue("ii", old_enabled, old_enabled_on_thread); #else - Py_RETURN_FALSE; + return Py_BuildValue("OO", Py_False, Py_False); #endif }