10000 Refactor to avoid potential to lock keys twice, share impl between in… · python/cpython@35cf56e · GitHub
[go: up one dir, main page]

Skip to content

Commit 35cf56e

Browse files
committed
Refactor to avoid potential to lock keys twice, share impl between insertdict and setdefault
1 parent b4d34f1 commit 35cf56e

File tree

1 file changed

+122
-127
lines changed

1 file changed

+122
-127
lines changed

Objects/dictobject.c

Lines changed: 122 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,41 @@ _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key)
11871187
return unicodekeys_lookup_unicode(dk, key, hash);
11881188
}
11891189

1190+
1191+
#ifdef Py_GIL_DISABLED
1192+
1193+
// Version of _Py_dict_lookup specialized for when we have split keys and the
1194+
// shared keys are locked.
1195+
static Py_ssize_t
1196+
splitdict_lookup_keys_lock_held(PyDictObject *mp, PyObject *key, Py_hash_t hash,
1197+
PyObject **value_addr)
1198+
{
1199+
PyDictKeysObject *dk = mp->ma_keys;
1200+
Py_ssize_t ix;
1201+
1202+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp);
1203+
ASSERT_KEYS_LOCKED(dk);
1204+
assert(PyUnicode_CheckExact(key));
1205+
assert(dk->dk_kind == DICT_KEYS_SPLIT);
1206+
1207+
ix = unicodekeys_lookup_unicode(dk, key, hash);
1208+
1209+
if (ix >= 0) {
1210+
*value_addr = mp->ma_values->values[ix];
1211+
}
1212+
else {
1213+
*value_addr = NULL;
1214+
}
1215+
1216+
return ix;
1217+
}
1218+
1219+
static Py_ssize_t
1220+
unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key,
1221+
Py_hash_t hash);
1222+
1223+
#endif // Py_GIL_DISABLED
1224+
11901225
/*
11911226
The basic lookup function used by all operations.
11921227
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -1216,9 +1251,23 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
12161251

12171252
if (kind != DICT_KEYS_GENERAL) {
12181253
if (PyUnicode_CheckExact(key)) {
1219-
LOCK_KEYS_IF_SPLIT(dk, kind);
1254+
#ifdef Py_GIL_DISABLED
1255+
if (kind == DICT_KEYS_SPLIT) {
1256+
// A split dictionaries keys can be mutated by other
1257+
// dictionaries but if we have a unicode key we can avoid
1258+
// locking the shared keys.
1259+
ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
1260+
if (ix == DKIX_KEY_CHANGED) {
1261+
LOCK_KEYS(dk);
1262+
ix = splitdict_lookup_keys_lock_held(mp, key, hash,
1263+
value_addr);
1264+
UNLOCK_KEYS(dk);
1265+
return ix;
1266+
}
1267+
}
1268+
#endif
1269+
12201270
ix = unicodekeys_lookup_unicode(dk, key, hash);
1221-
UNLOCK_KEYS_IF_SPLIT(dk, kind);
12221271
}
12231272
else {
12241273
INCREF_KEYS_FT(dk);
@@ -1468,7 +1517,6 @@ _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb
14681517
Py_DECREF(value);
14691518
goto read_failed;
14701519
}
1471-
14721520
}
14731521
}
14741522
else {
@@ -1690,32 +1738,54 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp,
16901738
return 0;
16911739
}
16921740

1693-
static int
1694-
split_dict_resize_and_insert(PyInterpreterState *interp, PyDictObject *mp,
1695-
Py_hash_t hash, PyObject *key, PyObject *value)
1696-
{
1697-
/* Need to resize. */
1698-
int ins = insertion_resize(interp, mp, 1);
1699-
if (ins < 0) {
1700-
return -1;
1701-
}
1702-
assert(!_PyDict_HasSplitTable(mp));
1703-
return insert_combined_dict(interp, mp, hash, key, value);
1704-
}
1705-
1706-
static int
1741+
// Performs an insert into a split dictionary when the key is not already
1742+
// present. Returns 0 on success, -1 on error, and 1 if a race occured and
1743+
// the key got added by another thread. Consumes key and value references
1744+
// if the key and value are successfully inserted.
1745+
static inline int
17071746
insert_split_dict(PyInterpreterState *interp, PyDictObject *mp,
1708-
Py_hash_t hash, PyObject *key, PyObject *value)
1747+
Py_hash_t hash, PyObject *key, PyObject *value,
1748+
Py_ssize_t *ix, PyObject **result)
17091749
{
1750+
ASSERT_DICT_LOCKED(mp);
1751+
17101752
PyDictKeysObject *keys = mp->ma_keys;
1753+
LOCK_KEYS(keys);
1754+
1755+
#ifdef Py_GIL_DISABLED
1756+
PyObject *existing_value;
1757+
// We could have raced between our lookup before and the insert,
1758+
// so we need to lookup again with the keys locked
1759+
*ix = splitdict_lookup_keys_lock_held(mp, key, hash,
1760+
&existing_value);
1761+
if (*ix >= 0) {
1762+
UNLOCK_KEYS(keys);
1763+
*result = existing_value;
1764+
// There was a race with someone else inserting the key, the
1765+
// caller needs to handle it as they may not want to replace
1766+
// the existing value.
1767+
return 1;
1768+
}
1769+
#endif
1770+
1771+
uint64_t new_version = _PyDict_NotifyEvent(
1772+
interp, PyDict_EVENT_ADDED, mp, key, value);
1773+
keys->dk_version = 0;
17111774
if (keys->dk_usable <= 0) {
1712-
/* Need to resize. */
1775+
UNLOCK_KEYS(keys);
17131776
int ins = insertion_resize(interp, mp, 1);
17141777
if (ins < 0) {
17151778
return -1;
17161779
}
17171780
assert(!_PyDict_HasSplitTable(mp));
1718-
return insert_combined_dict(interp, mp, hash, key, value);
1781+
if (insert_combined_dict(interp, mp, hash, key, value) < 0) {
1782+
*result = NULL;
1783+
return -1;
1784+
}
1785+
1786+
*result = value;
1787+
mp->ma_version_tag = new_version;
1788+
return 0;
17191789
}
17201790

17211791
Py_ssize_t hashpos = find_empty_slot(keys, hash);
@@ -1732,34 +1802,13 @@ insert_split_dict(PyInterpreterState *interp, PyDictObject *mp,
17321802

17331803
split_keys_entry_added(keys);
17341804
assert(keys->dk_usable >= 0);
1735-
return 0;
1736-
}
17371805

1738-
#ifdef Py_GIL_DISABLED
1739-
1740-
static inline Py_ssize_t
1741-
splitdict_lookup_threadsafe(PyDictObject *mp, PyDictKeysObject *dk,
1742-
PyObject *key, Py_ssize_t hash,
1743-
PyObject **value)
1744-
{
1745-
ASSERT_DICT_LOCKED(mp);
1746-
1747-
Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
1748-
1749-
if (ix >= 0) {
1750-
*value = mp->ma_values->values[ix];
1751-
}
1752-
else if (ix == DKIX_KEY_CHANGED) {
1753-
ix = _Py_dict_lookup(mp, key, hash, value);
1754-
}
1755-
else {
1756-
*value = 0;
1757-
}
1758-
return ix;
1806+
UNLOCK_KEYS(keys);
1807+
*result = value;
1808+
mp->ma_version_tag = new_version;
1809+
return 0;
17591810
}
17601811

1761-
#endif
1762-
17631812
/*
17641813
Internal routine to insert a new item into the table.
17651814
Used both by the internal resize routine and by the public insert routine.
@@ -1780,75 +1829,37 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
17801829
assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL);
17811830
}
17821831

1783-
Py_ssize_t ix;
1784-
#ifdef Py_GIL_DISABLED
1785-
PyDictKeysObject *dk = mp->ma_keys;
1786-
DictKeysKind kind = dk->dk_kind;
1787-
1788-
if (kind == DICT_KEYS_SPLIT) {
1789-
// Split keys can be mutated by other dictionaries, so we need
1790-
// to do a threadsafe lookup here. This is basically the split-key
1791-
// half of _Py_dict_lookup_threadsafe and avoids locking the
1792-
// shared keys if we can
1793-
assert(PyUnicode_CheckExact(key));
1794-
ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value);
1795-
} else {
1796-
ix = _Py_dict_lookup(mp, key, hash, &old_value);
1797-
}
1798-
#else
1799-
ix = _Py_dict_lookup(mp, key, hash, &old_value);
1800-
#endif
1832+
Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value);
18011833
if (ix == DKIX_ERROR)
18021834
goto Fail;
18031835

18041836
MAINTAIN_TRACKING(mp, key, value);
18051837

18061838
if (ix == DKIX_EMPTY) {
1807-
uint64_t new_version;
18081839
if (!_PyDict_HasSplitTable(mp)) {
1809-
new_version = _PyDict_NotifyEvent(
1840+
uint64_t new_version = _PyDict_NotifyEvent(
18101841
interp, PyDict_EVENT_ADDED, mp, key, value);
18111842
/* Insert into new slot. */
18121843
mp->ma_keys->dk_version = 0;
18131844
if (insert_combined_dict(interp, mp, hash, key, value) < 0) {
18141845
goto Fail;
18151846
}
1847+
mp->ma_version_tag = new_version;
18161848
}
18171849
else {
1818-
LOCK_KEYS(mp->ma_keys);
1819-
1850+
int res = insert_split_dict(interp, mp, hash, key,
1851+
value, &ix, &old_value);
1852+
if (res < 0) {
1853+
return -1;
1854+
}
18201855
#ifdef Py_GIL_DISABLED
1821-
// We could have raced between our lookup before and the insert,
1822-
// so we need to lookup again with the keys locked
1823-
ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value);
1824-
if (ix >= 0) {
1825-
UNLOCK_KEYS(mp->ma_keys);
1856+
else if (res == 1) {
18261857
goto insert_on_split_race;
18271858
}
18281859
#endif
1829-
new_version = _PyDict_NotifyEvent(
1830-
interp, PyDict_EVENT_ADDED, mp, key, value);
1831-
/* Insert into new slot. */
1832-
mp->ma_keys->dk_version = 0;
1833-
if (mp->ma_keys->dk_usable <= 0) {
1834-
UNLOCK_KEYS(mp->ma_keys);
1835-
1836-
if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) {
1837-
goto Fail;
1838-
}
1839-
} else {
1840-
int insert = insert_split_dict(interp, mp, hash, key, value);
1841-
UNLOCK_KEYS(mp->ma_keys);
1842-
1843-
if (insert < 0) {
1844-
goto Fail;
1845-
}
1846-
}
1847-
mp->ma_keys->dk_version = new_version;
18481860
}
18491861

18501862
mp->ma_used++;
1851-
mp->ma_version_tag = new_version;
18521863
ASSERT_CONSISTENT(mp);
18531864
return 0;
18541865
}
@@ -4290,12 +4301,12 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
42904301
}
42914302

42924303
if (ix == DKIX_EMPTY) {
4293-
uint64_t new_version = _PyDict_NotifyEvent(
4294-
interp, PyDict_EVENT_ADDED, mp, key, default_value);
4295-
mp->ma_keys->dk_version = 0;
42964304
value = default_value;
42974305

42984306
if (!_PyDict_HasSplitTable(mp)) {
4307+
uint64_t new_version = _PyDict_NotifyEvent(
4308+
interp, PyDict_EVENT_ADDED, mp, key, default_value);
4309+
mp->ma_keys->dk_version = 0;
42994310
if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) {
43004311
Py_DECREF(key);
43014312
Py_DECREF(value);
@@ -4304,60 +4315,44 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
43044315
}
43054316
return -1;
43064317
}
4318+
mp->ma_version_tag = new_version;
43074319
}
43084320
else {
4309-
LOCK_KEYS(mp->ma_keys);
4310-
#ifdef Py_GIL_DISABLED
4311-
// We could have raced between our lookup before and the insert,
4312-
// so we need to lookup again with the keys locked
4313-
ix = _Py_dict_lookup(mp, key, hash, &value);
4314-
if (ix >= 0) {
4315-
UNLOCK_KEYS(mp->ma_keys);
4316-
if (value != NULL) {
4317-
if (result) {
4318-
*result = incref_result ? Py_NewRef(value) : value;
4319-
}
4320-
return 0;
4321-
}
4322-
goto insert_on_split_race;
4323-
}
4324-
#endif
4325-
if (mp->ma_keys->dk_usable <= 0) {
4326-
UNLOCK_KEYS(mp->ma_keys);
4327-
4328-
if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) {
4329-
return -1;
4330-
}
4331-
} else {
4332-
int insert = insert_split_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value));
4333-
UNLOCK_KEYS(mp->ma_keys);
4334-
4335-
if (insert < 0) {
4336-
Py_DECREF(key);
4337-
Py_DECREF(value);
4321+
int res = insert_split_dict(interp, mp, hash, Py_NewRef(key),
4322+
Py_NewRef(default_value), &ix, &value);
4323+
if (res) {
4324+
Py_DECREF(key);
4325+
Py_DECREF(value);
4326+
if (res < 0) {
43384327
if (result) {
43394328
*result = NULL;
43404329
}
43414330
return -1;
43424331
}
4332+
4333+
#ifdef Py_GIL_DISABLED
4334+
// Raced with another thread inserting
4335+
goto insert_on_split_race;
4336+
#endif
43434337
}
43444338
}
43454339

43464340
MAINTAIN_TRACKING(mp, key, value);
43474341
mp->ma_used++;
4348-
mp->ma_version_tag = new_version;
43494342
assert(mp->ma_keys->dk_usable >= 0);
43504343
ASSERT_CONSISTENT(mp);
43514344
if (result) {
43524345
*result = incref_result ? Py_NewRef(value) : value;
43534346
}
43544347
return 0;
43554348
}
4356-
else if (value == NULL) {
4349+
43574350
#ifdef Py_GIL_DISABLED
43584351
insert_on_split_race:
43594352
#endif
4360-
uint64_t new_version = _PyDict_NotifyEvent(
4353+
if (value == NULL) {
4354+
uint64_t new_version;
4355+
new_version = _PyDict_NotifyEvent(
43614356
interp, PyDict_EVENT_ADDED, mp, key, default_value);
43624357
value = default_value;
43634358
assert(_PyDict_HasSplitTable(mp));

0 commit comments

Comments
 (0)
0