From 7e75eb23ebd5e7d12530e4fc3c799e8987b8bed8 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Wed, 3 Apr 2024 16:48:24 -0700 Subject: [PATCH 1/8] Lock shared keys in `Py_dict_lookup` and use thread-safe lookup in `insertdict` --- Objects/dictobject.c | 59 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index afcf535f8c0a78..4c3870b740bcf6 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -162,6 +162,16 @@ ASSERT_DICT_LOCKED(PyObject *op) assert(_Py_IsOwnedByCurrentThread((PyObject *)mp) || IS_DICT_SHARED(mp)); #define LOAD_KEYS_NENTRIES(d) +#define LOCK_KEYS_IF_SPLIT(keys, kind) \ + if (kind == DICT_KEYS_SPLIT) { \ + LOCK_KEYS(dk); \ + } + +#define UNLOCK_KEYS_IF_SPLIT(keys, kind) \ + if (kind == DICT_KEYS_SPLIT) { \ + UNLOCK_KEYS(dk); \ + } + static inline Py_ssize_t load_keys_nentries(PyDictObject *mp) { @@ -195,6 +205,9 @@ set_values(PyDictObject *mp, PyDictValues *values) #define DECREF_KEYS(dk) _Py_atomic_add_ssize(&dk->dk_refcnt, -1) #define LOAD_KEYS_NENTIRES(keys) _Py_atomic_load_ssize_relaxed(&keys->dk_nentries) +#define INCREF_KEYS_FT(dk) dictkeys_incref(dk) +#define DECREF_KEYS_FT(dk, shared) dictkeys_decref(_PyInterpreterState_GET(), dk, shared) + static inline void split_keys_entry_added(PyDictKeysObject *keys) { ASSERT_KEYS_LOCKED(keys); @@ -216,6 +229,10 @@ static inline void split_keys_entry_added(PyDictKeysObject *keys) #define INCREF_KEYS(dk) dk->dk_refcnt++ #define DECREF_KEYS(dk) dk->dk_refcnt-- #define LOAD_KEYS_NENTIRES(keys) keys->dk_nentries +#define INCREF_KEYS_FT(dk) +#define DECREF_KEYS_FT(dk, shared) +#define LOCK_KEYS_IF_SPLIT(keys, kind) +#define UNLOCK_KEYS_IF_SPLIT(keys, kind) #define IS_DICT_SHARED(mp) (false) #define SET_DICT_SHARED(mp) #define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)(keys->dk_indices))[idx] @@ -1200,10 +1217,18 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu if (kind != DICT_KEYS_GENERAL) { if (PyUnicode_CheckExact(key)) { + LOCK_KEYS_IF_SPLIT(dk, kind); ix = unicodekeys_lookup_unicode(dk, key, hash); + UNLOCK_KEYS_IF_SPLIT(dk, kind); } else { + INCREF_KEYS_FT(dk); + LOCK_KEYS_IF_SPLIT(dk, kind); + ix = unicodekeys_lookup_generic(mp, dk, key, hash); + + UNLOCK_KEYS_IF_SPLIT(dk, kind); + DECREF_KEYS_FT(dk, IS_DICT_SHARED(mp)); if (ix == DKIX_KEY_CHANGED) { goto start; } @@ -1444,6 +1469,7 @@ _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb Py_DECREF(value); goto read_failed; } + } } else { @@ -1720,7 +1746,38 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); } - Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); + Py_ssize_t ix; +#ifdef Py_GIL_DISABLED + PyDictKeysObject *dk = mp->ma_keys; + DictKeysKind kind = dk->dk_kind; + + if (kind == DICT_KEYS_SPLIT) { + // Split keys can be mutated by other dictionaries, so we need + // to do a threadsafe lookup here. This is basically the split-key + // half of _Py_dict_lookup_threadsafe and avoids locking the + // shared keys if we can + if (PyUnicode_CheckExact(key)) { + ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); + } + else { + ix = unicodekeys_lookup_generic_threadsafe(mp, dk, key, hash); + } + + if (ix >= 0) { + old_value = mp->ma_values->values[ix]; + } + else if (ix == DKIX_KEY_CHANGED) { + ix = _Py_dict_lookup(mp, key, hash, &old_value); + } + else { + old_value = NULL; + } + } else { + ix = _Py_dict_lookup(mp, key, hash, &old_value); + } +#else + ix = _Py_dict_lookup(mp, key, hash, &old_value); +#endif if (ix == DKIX_ERROR) goto Fail; From 5f498f70e431f9a74e050ca6b65c32d3df5abf4e Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 4 Apr 2024 11:23:49 -0700 Subject: [PATCH 2/8] Assert we always have unicode only key in split dict case --- Objects/dictobject.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 4c3870b740bcf6..cefa78c21710db 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1756,12 +1756,8 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, // to do a threadsafe lookup here. This is basically the split-key // half of _Py_dict_lookup_threadsafe and avoids locking the // shared keys if we can - if (PyUnicode_CheckExact(key)) { - ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); - } - else { - ix = unicodekeys_lookup_generic_threadsafe(mp, dk, key, hash); - } + assert(PyUnicode_CheckExact(key)); + ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); if (ix >= 0) { old_value = mp->ma_values->values[ix]; From 90fdea334b6178cef3de24f104d58bd2473b6f0e Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 4 Apr 2024 17:26:01 -0700 Subject: [PATCH 3/8] Re-probe for index with shared keys locked before adding split index --- Objects/dictobject.c | 141 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 113 insertions(+), 28 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index cefa78c21710db..a84697c48dda30 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1691,15 +1691,26 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, return 0; } +static int +split_dict_resize_and_insert(PyInterpreterState *interp, PyDictObject *mp, + Py_hash_t hash, PyObject *key, PyObject *value) +{ + /* Need to resize. */ + int ins = insertion_resize(interp, mp, 1); + if (ins < 0) { + return -1; + } + assert(!_PyDict_HasSplitTable(mp)); + return insert_combined_dict(interp, mp, hash, key, value); +} + static int insert_split_dict(PyInterpreterState *interp, PyDictObject *mp, Py_hash_t hash, PyObject *key, PyObject *value) { PyDictKeysObject *keys = mp->ma_keys; - LOCK_KEYS(keys); if (keys->dk_usable <= 0) { /* Need to resize. */ - UNLOCK_KEYS(keys); int ins = insertion_resize(interp, mp, 1); if (ins < 0) { return -1; @@ -1722,10 +1733,34 @@ insert_split_dict(PyInterpreterState *interp, PyDictObject *mp, split_keys_entry_added(keys); assert(keys->dk_usable >= 0); - UNLOCK_KEYS(keys); return 0; } +#ifdef Py_GIL_DISABLED + +static inline Py_ssize_t +splitdict_lookup_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, + PyObject *key, Py_ssize_t hash, + PyObject **value) +{ + ASSERT_DICT_LOCKED(mp); + + Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); + + if (ix >= 0) { + *value = mp->ma_values->values[ix]; + } + else if (ix == DKIX_KEY_CHANGED) { + ix = _Py_dict_lookup(mp, key, hash, value); + } + else { + *value = 0; + } + return ix; +} + +#endif + /* Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. @@ -1757,17 +1792,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, // half of _Py_dict_lookup_threadsafe and avoids locking the // shared keys if we can assert(PyUnicode_CheckExact(key)); - ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); - - if (ix >= 0) { - old_value = mp->ma_values->values[ix]; - } - else if (ix == DKIX_KEY_CHANGED) { - ix = _Py_dict_lookup(mp, key, hash, &old_value); - } - else { - old_value = NULL; - } + ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value); } else { ix = _Py_dict_lookup(mp, key, hash, &old_value); } @@ -1780,20 +1805,47 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, MAINTAIN_TRACKING(mp, key, value); if (ix == DKIX_EMPTY) { - uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, value); - /* Insert into new slot. */ - mp->ma_keys->dk_version = 0; - assert(old_value == NULL); - + uint64_t new_version; if (!_PyDict_HasSplitTable(mp)) { + new_version = _PyDict_NotifyEvent( + interp, PyDict_EVENT_ADDED, mp, key, value); + /* Insert into new slot. */ + mp->ma_keys->dk_version = 0; if (insert_combined_dict(interp, mp, hash, key, value) < 0) { goto Fail; } } else { - if (insert_split_dict(interp, mp, hash, key, value) < 0) - goto Fail; + LOCK_KEYS(mp->ma_keys); + +#ifdef Py_GIL_DISABLED + // We could have raced between our lookup before and the insert, + // so we need to lookup again with the keys locked + ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value); + if (ix >= 0) { + UNLOCK_KEYS(mp->ma_keys); + goto insert_on_split_race; + } +#endif + new_version = _PyDict_NotifyEvent( + interp, PyDict_EVENT_ADDED, mp, key, value); + /* Insert into new slot. */ + mp->ma_keys->dk_version = 0; + if (mp->ma_keys->dk_usable <= 0) { + UNLOCK_KEYS(mp->ma_keys); + + if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) { + goto Fail; + } + } else { + int insert = insert_split_dict(interp, mp, hash, key, value); + UNLOCK_KEYS(mp->ma_keys); + + if (insert < 0) { + goto Fail; + } + } + mp->ma_keys->dk_version = new_version; } mp->ma_used++; @@ -1802,6 +1854,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, return 0; } +#ifdef Py_GIL_DISABLED +insert_on_split_race: +#endif if (old_value != value) { uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_MODIFIED, mp, key, value); @@ -4255,13 +4310,40 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } } else { - if (insert_split_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { - Py_DECREF(key); - Py_DECREF(value); - if (result) { - *result = NULL; + LOCK_KEYS(mp->ma_keys); +#ifdef Py_GIL_DISABLED + // We could have raced between our lookup before and the insert, + // so we need to lookup again with the keys locked + ix = _Py_dict_lookup(mp, key, hash, &value); + if (ix >= 0) { + UNLOCK_KEYS(mp->ma_keys); + if (value != NULL) { + if (result) { + *result = incref_result ? Py_NewRef(value) : value; + } + return 0; + } + goto insert_on_split_race; + } +#endif + if (mp->ma_keys->dk_usable <= 0) { + UNLOCK_KEYS(mp->ma_keys); + + if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) { + return -1; + } + } else { + int insert = insert_split_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)); + UNLOCK_KEYS(mp->ma_keys); + + if (insert < 0) { + Py_DECREF(key); + Py_DECREF(value); + if (result) { + *result = NULL; + } + return -1; } - return -1; } } @@ -4276,6 +4358,9 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu return 0; } else if (value == NULL) { +#ifdef Py_GIL_DISABLED +insert_on_split_race: +#endif uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, default_value); value = default_value; From 1272d682df4b096122e3d1dc247f641c61a385a4 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 15 Apr 2024 10:17:39 -0700 Subject: [PATCH 4/8] Refactor to avoid potential to lock keys twice, share impl between insertdict and setdefault --- Objects/dictobject.c | 248 +++++++++++++++++++++---------------------- 1 file changed, 122 insertions(+), 126 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a84697c48dda30..835ffa6689909b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1188,6 +1188,41 @@ _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key) return unicodekeys_lookup_unicode(dk, key, hash); } + +#ifdef Py_GIL_DISABLED + +// Version of _Py_dict_lookup specialized for when we have split keys and the +// shared keys are locked. +static Py_ssize_t +splitdict_lookup_keys_lock_held(PyDictObject *mp, PyObject *key, Py_hash_t hash, + PyObject **value_addr) +{ + PyDictKeysObject *dk = mp->ma_keys; + Py_ssize_t ix; + + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); + ASSERT_KEYS_LOCKED(dk); + assert(PyUnicode_CheckExact(key)); + assert(dk->dk_kind == DICT_KEYS_SPLIT); + + ix = unicodekeys_lookup_unicode(dk, key, hash); + + if (ix >= 0) { + *value_addr = mp->ma_values->values[ix]; + } + else { + *value_addr = NULL; + } + + return ix; +} + +static Py_ssize_t +unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key, + Py_hash_t hash); + +#endif // Py_GIL_DISABLED + /* The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. @@ -1217,9 +1252,23 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu if (kind != DICT_KEYS_GENERAL) { if (PyUnicode_CheckExact(key)) { - LOCK_KEYS_IF_SPLIT(dk, kind); +#ifdef Py_GIL_DISABLED + if (kind == DICT_KEYS_SPLIT) { + // A split dictionaries keys can be mutated by other + // dictionaries but if we have a unicode key we can avoid + // locking the shared keys. + ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); + if (ix == DKIX_KEY_CHANGED) { + LOCK_KEYS(dk); + ix = splitdict_lookup_keys_lock_held(mp, key, hash, + value_addr); + UNLOCK_KEYS(dk); + return ix; + } + } +#endif + ix = unicodekeys_lookup_unicode(dk, key, hash); - UNLOCK_KEYS_IF_SPLIT(dk, kind); } else { INCREF_KEYS_FT(dk); @@ -1469,7 +1518,6 @@ _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb Py_DECREF(value); goto read_failed; } - } } else { @@ -1691,32 +1739,55 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, return 0; } -static int -split_dict_resize_and_insert(PyInterpreterState *interp, PyDictObject *mp, - Py_hash_t hash, PyObject *key, PyObject *value) -{ - /* Need to resize. */ - int ins = insertion_resize(interp, mp, 1); - if (ins < 0) { - return -1; - } - assert(!_PyDict_HasSplitTable(mp)); - return insert_combined_dict(interp, mp, hash, key, value); -} - -static int +// Performs an insert into a split dictionary when the key is not already +// present. Returns 0 on success, -1 on error, and 1 if a race occured and +// the key got added by another thread. Consumes key and value references +// if the key and value are successfully inserted. +static inline int insert_split_dict(PyInterpreterState *interp, PyDictObject *mp, - Py_hash_t hash, PyObject *key, PyObject *value) + Py_hash_t hash, PyObject *key, PyObject *value, + Py_ssize_t *ix, PyObject **result) { + ASSERT_DICT_LOCKED(mp); + PyDictKeysObject *keys = mp->ma_keys; + LOCK_KEYS(keys); + +#ifdef Py_GIL_DISABLED + PyObject *existing_value; + // We could have raced between our lookup before and the insert, + // so we need to lookup again with the keys locked + *ix = splitdict_lookup_keys_lock_held(mp, key, hash, + &existing_value); + if (*ix >= 0) { + UNLOCK_KEYS(keys); + *result = existing_value; + // There was a race with someone else inserting the key, the + // caller needs to handle it as they may not want to replace + // the existing value. + return 1; + } +#endif + + uint64_t new_version = _PyDict_NotifyEvent( + interp, PyDict_EVENT_ADDED, mp, key, value); + keys->dk_version = 0; if (keys->dk_usable <= 0) { /* Need to resize. */ + UNLOCK_KEYS(keys); int ins = insertion_resize(interp, mp, 1); if (ins < 0) { return -1; } assert(!_PyDict_HasSplitTable(mp)); - return insert_combined_dict(interp, mp, hash, key, value); + if (insert_combined_dict(interp, mp, hash, key, value) < 0) { + *result = NULL; + return -1; + } + + *result = value; + mp->ma_version_tag = new_version; + return 0; } Py_ssize_t hashpos = find_empty_slot(keys, hash); @@ -1733,34 +1804,13 @@ insert_split_dict(PyInterpreterState *interp, PyDictObject *mp, split_keys_entry_added(keys); assert(keys->dk_usable >= 0); - return 0; -} -#ifdef Py_GIL_DISABLED - -static inline Py_ssize_t -splitdict_lookup_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, - PyObject *key, Py_ssize_t hash, - PyObject **value) -{ - ASSERT_DICT_LOCKED(mp); - - Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); - - if (ix >= 0) { - *value = mp->ma_values->values[ix]; - } - else if (ix == DKIX_KEY_CHANGED) { - ix = _Py_dict_lookup(mp, key, hash, value); - } - else { - *value = 0; - } - return ix; + UNLOCK_KEYS(keys); + *result = value; + mp->ma_version_tag = new_version; + return 0; } -#endif - /* Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. @@ -1781,75 +1831,37 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); } - Py_ssize_t ix; -#ifdef Py_GIL_DISABLED - PyDictKeysObject *dk = mp->ma_keys; - DictKeysKind kind = dk->dk_kind; - - if (kind == DICT_KEYS_SPLIT) { - // Split keys can be mutated by other dictionaries, so we need - // to do a threadsafe lookup here. This is basically the split-key - // half of _Py_dict_lookup_threadsafe and avoids locking the - // shared keys if we can - assert(PyUnicode_CheckExact(key)); - ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value); - } else { - ix = _Py_dict_lookup(mp, key, hash, &old_value); - } -#else - ix = _Py_dict_lookup(mp, key, hash, &old_value); -#endif + Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); if (ix == DKIX_ERROR) goto Fail; MAINTAIN_TRACKING(mp, key, value); if (ix == DKIX_EMPTY) { - uint64_t new_version; if (!_PyDict_HasSplitTable(mp)) { - new_version = _PyDict_NotifyEvent( + uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); /* Insert into new slot. */ mp->ma_keys->dk_version = 0; if (insert_combined_dict(interp, mp, hash, key, value) < 0) { goto Fail; } + mp->ma_version_tag = new_version; } else { - LOCK_KEYS(mp->ma_keys); - + int res = insert_split_dict(interp, mp, hash, key, + value, &ix, &old_value); + if (res < 0) { + return -1; + } #ifdef Py_GIL_DISABLED - // We could have raced between our lookup before and the insert, - // so we need to lookup again with the keys locked - ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value); - if (ix >= 0) { - UNLOCK_KEYS(mp->ma_keys); + else if (res == 1) { goto insert_on_split_race; } #endif - new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, value); - /* Insert into new slot. */ - mp->ma_keys->dk_version = 0; - if (mp->ma_keys->dk_usable <= 0) { - UNLOCK_KEYS(mp->ma_keys); - - if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) { - goto Fail; - } - } else { - int insert = insert_split_dict(interp, mp, hash, key, value); - UNLOCK_KEYS(mp->ma_keys); - - if (insert < 0) { - goto Fail; - } - } - mp->ma_keys->dk_version = new_version; } mp->ma_used++; - mp->ma_version_tag = new_version; ASSERT_CONSISTENT(mp); return 0; } @@ -4294,12 +4306,12 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } if (ix == DKIX_EMPTY) { - uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, default_value); - mp->ma_keys->dk_version = 0; value = default_value; if (!_PyDict_HasSplitTable(mp)) { + uint64_t new_version = _PyDict_NotifyEvent( + interp, PyDict_EVENT_ADDED, mp, key, default_value); + mp->ma_keys->dk_version = 0; if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { Py_DECREF(key); Py_DECREF(value); @@ -4308,48 +4320,30 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } return -1; } + mp->ma_version_tag = new_version; } else { - LOCK_KEYS(mp->ma_keys); -#ifdef Py_GIL_DISABLED - // We could have raced between our lookup before and the insert, - // so we need to lookup again with the keys locked - ix = _Py_dict_lookup(mp, key, hash, &value); - if (ix >= 0) { - UNLOCK_KEYS(mp->ma_keys); - if (value != NULL) { - if (result) { - *result = incref_result ? Py_NewRef(value) : value; - } - return 0; - } - goto insert_on_split_race; - } -#endif - if (mp->ma_keys->dk_usable <= 0) { - UNLOCK_KEYS(mp->ma_keys); - - if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) { - return -1; - } - } else { - int insert = insert_split_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)); - UNLOCK_KEYS(mp->ma_keys); - - if (insert < 0) { - Py_DECREF(key); - Py_DECREF(value); + int res = insert_split_dict(interp, mp, hash, Py_NewRef(key), + Py_NewRef(default_value), &ix, &value); + if (res) { + Py_DECREF(key); + Py_DECREF(value); + if (res < 0) { if (result) { *result = NULL; } return -1; } + +#ifdef Py_GIL_DISABLED + // Raced with another thread inserting + goto insert_on_split_race; +#endif } } MAINTAIN_TRACKING(mp, key, value); mp->ma_used++; - mp->ma_version_tag = new_version; assert(mp->ma_keys->dk_usable >= 0); ASSERT_CONSISTENT(mp); if (result) { @@ -4357,11 +4351,13 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } return 0; } - else if (value == NULL) { + #ifdef Py_GIL_DISABLED insert_on_split_race: #endif - uint64_t new_version = _PyDict_NotifyEvent( + if (value == NULL) { + uint64_t new_version; + new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, default_value); value = default_value; assert(_PyDict_HasSplitTable(mp)); From 002ca044235abfc9459c3d2eea6b3dd28011c218 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 8 Apr 2024 22:17:24 +0000 Subject: [PATCH 5/8] gh-112075: Lock when inserting into shared keys --- Objects/dictobject.c | 303 ++++++++++++++++--------------------------- 1 file changed, 113 insertions(+), 190 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 835ffa6689909b..f877e7c4b1a151 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1681,31 +1681,6 @@ insertion_resize(PyInterpreterState *interp, PyDictObject *mp, int unicode) return dictresize(interp, mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode); } -static Py_ssize_t -insert_into_splitdictkeys(PyDictKeysObject *keys, PyObject *name, Py_hash_t hash) -{ - assert(PyUnicode_CheckExact(name)); - ASSERT_KEYS_LOCKED(keys); - - Py_ssize_t ix = unicodekeys_lookup_unicode(keys, name, hash); - if (ix == DKIX_EMPTY) { - if (keys->dk_usable <= 0) { - return DKIX_EMPTY; - } - /* Insert into new slot. */ - keys->dk_version = 0; - Py_ssize_t hashpos = find_empty_slot(keys, hash); - ix = keys->dk_nentries; - PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(keys)[ix]; - dictkeys_set_index(keys, hashpos, ix); - assert(ep->me_key == NULL); - ep->me_key = Py_NewRef(name); - split_keys_entry_added(keys); - } - assert (ix < SHARED_KEYS_MAX_SIZE); - return ix; -} - static inline int insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, Py_hash_t hash, PyObject *key, PyObject *value) @@ -1739,76 +1714,57 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, return 0; } -// Performs an insert into a split dictionary when the key is not already -// present. Returns 0 on success, -1 on error, and 1 if a race occured and -// the key got added by another thread. Consumes key and value references -// if the key and value are successfully inserted. -static inline int -insert_split_dict(PyInterpreterState *interp, PyDictObject *mp, - Py_hash_t hash, PyObject *key, PyObject *value, - Py_ssize_t *ix, PyObject **result) +static int +insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) { - ASSERT_DICT_LOCKED(mp); + assert(PyUnicode_CheckExact(key)); + Py_ssize_t ix; - PyDictKeysObject *keys = mp->ma_keys; - LOCK_KEYS(keys); #ifdef Py_GIL_DISABLED - PyObject *existing_value; - // We could have raced between our lookup before and the insert, - // so we need to lookup again with the keys locked - *ix = splitdict_lookup_keys_lock_held(mp, key, hash, - &existing_value); - if (*ix >= 0) { - UNLOCK_KEYS(keys); - *result = existing_value; - // There was a race with someone else inserting the key, the - // caller needs to handle it as they may not want to replace - // the existing value. - return 1; + ix = unicodekeys_lookup_unicode_threadsafe(keys, key, hash); + if (ix >= 0) { + return ix; } #endif - uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, value); - keys->dk_version = 0; - if (keys->dk_usable <= 0) { - /* Need to resize. */ - UNLOCK_KEYS(keys); - int ins = insertion_resize(interp, mp, 1); - if (ins < 0) { - return -1; - } - assert(!_PyDict_HasSplitTable(mp)); - if (insert_combined_dict(interp, mp, hash, key, value) < 0) { - *result = NULL; - return -1; - } + LOCK_KEYS(keys); + ix = unicodekeys_lookup_unicode(keys, key, hash); + if (ix == DKIX_EMPTY && keys->dk_usable > 0) { + // Insert into new slot + keys->dk_version = 0; + Py_ssize_t hashpos = find_empty_slot(keys, hash); + ix = keys->dk_nentries; + dictkeys_set_index(keys, hashpos, ix); + PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(keys)[ix]; + STORE_SHARED_KEY(ep->me_key, Py_NewRef(key)); + split_keys_entry_added(keys); + } + assert (ix < SHARED_KEYS_MAX_SIZE); + UNLOCK_KEYS(keys); + return ix; +} - *result = value; +static void +insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, PyObject *value, Py_ssize_t ix) +{ + assert(PyUnicode_CheckExact(key)); + MAINTAIN_TRACKING(mp, key, value); + PyObject *old_value = mp->ma_values->values[ix]; + if (old_value == NULL) { + uint64_t new_version = _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); + STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); + _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); + mp->ma_used++; mp->ma_version_tag = new_version; - return 0; } - - Py_ssize_t hashpos = find_empty_slot(keys, hash); - dictkeys_set_index(keys, hashpos, keys->dk_nentries); - - PyDictUnicodeEntry *ep; - ep = &DK_UNICODE_ENTRIES(keys)[keys->dk_nentries]; - STORE_SHARED_KEY(ep->me_key, key); - - Py_ssize_t index = keys->dk_nentries; - _PyDictValues_AddToInsertionOrder(mp->ma_values, index); - assert (mp->ma_values->values[index] == NULL); - STORE_SPLIT_VALUE(mp, index, value); - - split_keys_entry_added(keys); - assert(keys->dk_usable >= 0); - - UNLOCK_KEYS(keys); - *result = value; - mp->ma_version_tag = new_version; - return 0; + else { + uint64_t new_version = _PyDict_NotifyEvent(interp, PyDict_EVENT_MODIFIED, mp, key, value); + STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); + mp->ma_version_tag = new_version; + Py_DECREF(old_value); + } + ASSERT_CONSISTENT(mp); } /* @@ -1831,6 +1787,21 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); } + if (_PyDict_HasSplitTable(mp)) { + Py_ssize_t ix = insert_split_key(mp->ma_keys, key, hash); + if (ix != DKIX_EMPTY) { + insert_split_value(interp, mp, key, value, ix); + Py_DECREF(key); + Py_DECREF(value); + return 0; + } + + /* No space in shared keys. Resize and continue below. */ + if (insertion_resize(interp, mp, 1) < 0) { + goto Fail; + } + } + Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); if (ix == DKIX_ERROR) goto Fail; @@ -1838,57 +1809,33 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, MAINTAIN_TRACKING(mp, key, value); if (ix == DKIX_EMPTY) { - if (!_PyDict_HasSplitTable(mp)) { - uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, value); - /* Insert into new slot. */ - mp->ma_keys->dk_version = 0; - if (insert_combined_dict(interp, mp, hash, key, value) < 0) { - goto Fail; - } - mp->ma_version_tag = new_version; - } - else { - int res = insert_split_dict(interp, mp, hash, key, - value, &ix, &old_value); - if (res < 0) { - return -1; - } -#ifdef Py_GIL_DISABLED - else if (res == 1) { - goto insert_on_split_race; - } -#endif + assert(!_PyDict_HasSplitTable(mp)); + uint64_t new_version = _PyDict_NotifyEvent( + interp, PyDict_EVENT_ADDED, mp, key, value); + /* Insert into new slot. */ + mp->ma_keys->dk_version = 0; + assert(old_value == NULL); + if (insert_combined_dict(interp, mp, hash, key, value) < 0) { + goto Fail; } - + mp->ma_version_tag = new_version; mp->ma_used++; ASSERT_CONSISTENT(mp); return 0; } -#ifdef Py_GIL_DISABLED -insert_on_split_race: -#endif if (old_value != value) { uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_MODIFIED, mp, key, value); - if (_PyDict_HasSplitTable(mp)) { - STORE_SPLIT_VALUE(mp, ix, value); - if (old_value == NULL) { - _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); - mp->ma_used++; - } + assert(old_value != NULL); + assert(!_PyDict_HasSplitTable(mp)); + if (DK_IS_UNICODE(mp->ma_keys)) { + PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; + STORE_VALUE(ep, value); } else { - assert(old_value != NULL); - if (DK_IS_UNICODE(mp->ma_keys)) { - PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; - STORE_VALUE(ep, value); - } - else { - PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix]; - STORE_VALUE(ep, value); - } + PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix]; + STORE_VALUE(ep, value); } mp->ma_version_tag = new_version; } @@ -4297,6 +4244,29 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } } + if (_PyDict_HasSplitTable(mp)) { + Py_ssize_t ix = insert_split_key(mp->ma_keys, key, hash); + if (ix != DKIX_EMPTY) { + PyObject *value = mp->ma_values->values[ix]; + int already_present = value != NULL; + if (!already_present) { + insert_split_value(interp, mp, key, default_value, ix); + value = default_value; + } + if (result) { + *result = incref_result ? Py_NewRef(value) : value; + } + return already_present; + } + + /* No space in shared keys. Resize and continue below. */ + if (insertion_resize(interp, mp, 1) < 0) { + goto error; + } + } + + assert(!_PyDict_HasSplitTable(mp)); + Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &value); if (ix == DKIX_ERROR) { if (result) { @@ -4306,67 +4276,24 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } if (ix == DKIX_EMPTY) { + assert(!_PyDict_HasSplitTable(mp)); + uint64_t new_version = _PyDict_NotifyEvent( + interp, PyDict_EVENT_ADDED, mp, key, default_value); + mp->ma_keys->dk_version = 0; value = default_value; - if (!_PyDict_HasSplitTable(mp)) { - uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, default_value); - mp->ma_keys->dk_version = 0; - if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { - Py_DECREF(key); - Py_DECREF(value); - if (result) { - *result = NULL; - } - return -1; - } - mp->ma_version_tag = new_version; - } - else { - int res = insert_split_dict(interp, mp, hash, Py_NewRef(key), - Py_NewRef(default_value), &ix, &value); - if (res) { - Py_DECREF(key); - Py_DECREF(value); - if (res < 0) { - if (result) { - *result = NULL; - } - return -1; - } - -#ifdef Py_GIL_DISABLED - // Raced with another thread inserting - goto insert_on_split_race; -#endif + if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { + Py_DECREF(key); + Py_DECREF(value); + if (result) { + *result = NULL; } } MAINTAIN_TRACKING(mp, key, value); mp->ma_used++; - assert(mp->ma_keys->dk_usable >= 0); - ASSERT_CONSISTENT(mp); - if (result) { - *result = incref_result ? Py_NewRef(value) : value; - } - return 0; - } - -#ifdef Py_GIL_DISABLED -insert_on_split_race: -#endif - if (value == NULL) { - uint64_t new_version; - new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, default_value); - value = default_value; - assert(_PyDict_HasSplitTable(mp)); - assert(mp->ma_values->values[ix] == NULL); - MAINTAIN_TRACKING(mp, key, value); - STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); - _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); - mp->ma_used++; mp->ma_version_tag = new_version; + assert(mp->ma_keys->dk_usable >= 0); ASSERT_CONSISTENT(mp); if (result) { *result = incref_result ? Py_NewRef(value) : value; @@ -4374,11 +4301,18 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu return 0; } + assert(value != NULL); ASSERT_CONSISTENT(mp); if (result) { *result = incref_result ? Py_NewRef(value) : value; } return 1; + +error: + if (result) { + *result = NULL; + } + return -1; } int @@ -6833,18 +6767,7 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, assert(hash != -1); } -#ifdef Py_GIL_DISABLED - // Try a thread-safe lookup to see if the index is already allocated - ix = unicodekeys_lookup_unicode_threadsafe(keys, name, hash); - if (ix == DKIX_EMPTY || ix == DKIX_KEY_CHANGED) { - // Lock keys and do insert - LOCK_KEYS(keys); - ix = insert_into_splitdictkeys(keys, name, hash); - UNLOCK_KEYS(keys); - } -#else - ix = insert_into_splitdictkeys(keys, name, hash); -#endif + ix = insert_split_key(keys, name, hash); #ifdef Py_STATS if (ix == DKIX_EMPTY) { From e7d74306a92a7d0b5d586cb20ff76fece98994f2 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 15 Apr 2024 20:06:19 +0000 Subject: [PATCH 6/8] Fix return type --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f877e7c4b1a151..fc16f8efd87543 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1714,7 +1714,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, return 0; } -static int +static Py_ssize_t insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) { assert(PyUnicode_CheckExact(key)); From a74c0ee4049343ad81007350f28d8527f6d99ddd Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 23 Apr 2024 10:48:24 -0700 Subject: [PATCH 7/8] Remove splitdict_lookup_keys_lock_held --- Objects/dictobject.c | 42 ++++++------------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index fc16f8efd87543..0e40cc584e6081 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1188,41 +1188,10 @@ _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key) return unicodekeys_lookup_unicode(dk, key, hash); } - -#ifdef Py_GIL_DISABLED - -// Version of _Py_dict_lookup specialized for when we have split keys and the -// shared keys are locked. -static Py_ssize_t -splitdict_lookup_keys_lock_held(PyDictObject *mp, PyObject *key, Py_hash_t hash, - PyObject **value_addr) -{ - PyDictKeysObject *dk = mp->ma_keys; - Py_ssize_t ix; - - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); - ASSERT_KEYS_LOCKED(dk); - assert(PyUnicode_CheckExact(key)); - assert(dk->dk_kind == DICT_KEYS_SPLIT); - - ix = unicodekeys_lookup_unicode(dk, key, hash); - - if (ix >= 0) { - *value_addr = mp->ma_values->values[ix]; - } - else { - *value_addr = NULL; - } - - return ix; -} - static Py_ssize_t unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash); -#endif // Py_GIL_DISABLED - /* The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. @@ -1260,15 +1229,16 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); if (ix == DKIX_KEY_CHANGED) { LOCK_KEYS(dk); - ix = splitdict_lookup_keys_lock_held(mp, key, hash, - value_addr); + ix = unicodekeys_lookup_unicode(dk, key, hash); UNLOCK_KEYS(dk); - return ix; } } -#endif - + else { + ix = unicodekeys_lookup_unicode(dk, key, hash); + } +#else ix = unicodekeys_lookup_unicode(dk, key, hash); +#endif } else { INCREF_KEYS_FT(dk); From d5a5403a79d06f8d2b5e3906ac9b5b751f0e4bb9 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 25 Apr 2024 13:04:38 -0700 Subject: [PATCH 8/8] unicodekeys_lookup_unicode_threadsafe only when gil is disabled --- Objects/dictobject.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 0e40cc584e6081..43cb2350b7fc71 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1188,10 +1188,14 @@ _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key) return unicodekeys_lookup_unicode(dk, key, hash); } +#ifdef Py_GIL_DISABLED + static Py_ssize_t unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash); +#endif + /* The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.