8000 Refactor to avoid potential to lock keys twice, share impl between in… · python/cpython@d580aa5 · GitHub
[go: up one dir, main page]

Skip to content

Commit d580aa5

Browse files
committed
Refactor to avoid potential to lock keys twice, share impl between insertdict and setdefault
1 parent b4d34f1 commit d580aa5

File tree

1 file changed

+122
-126
lines changed

1 file changed

+122
-126
lines changed

Objects/dictobject.c

Lines changed: 122 additions & 126 deletions
F438
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,41 @@ _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key)
11871187
return unicodekeys_lookup_unicode(dk, key, hash);
11881188
}
11891189

1190+
1191+
#ifdef Py_GIL_DISABLED
1192+
1193+
// Version of _Py_dict_lookup specialized for when we have split keys and the
1194+
// shared keys are locked.
1195+
static Py_ssize_t
1196+
splitdict_lookup_keys_lock_held(PyDictObject *mp, PyObject *key, Py_hash_t hash,
1197+
PyObject **value_addr)
1198+
{
1199+
PyDictKeysObject *dk = mp->ma_keys;
1200+
Py_ssize_t ix;
1201+
1202+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp);
1203+
ASSERT_KEYS_LOCKED(dk);
1204+
assert(PyUnicode_CheckExact(key));
1205+
assert(dk->dk_kind == DICT_KEYS_SPLIT);
1206+
1207+
ix = unicodekeys_lookup_unicode(dk, key, hash);
1208+
1209+
if (ix >= 0) {
1210+
*value_addr = mp->ma_values->values[ix];
1211+
}
1212+
else {
1213+
*value_addr = NULL;
1214+
}
1215+
1216+
return ix;
1217+
}
1218+
1219+
static Py_ssize_t
1220+
unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key,
1221+
Py_hash_t hash);
1222+
1223+
#endif // Py_GIL_DISABLED
1224+
11901225
/*
11911226
The basic lookup function used by all operations.
11921227
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -1216,9 +1251,23 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
12161251

12171252
if (kind != DICT_KEYS_GENERAL) {
12181253
if (PyUnicode_CheckExact(key)) {
1219-
LOCK_KEYS_IF_SPLIT(dk, kind);
1254+
#ifdef Py_GIL_DISABLED
1255+
if (kind == DICT_KEYS_SPLIT) {
1256+
// A split dictionaries keys can be mutated by other
1257+
// dictionaries but if we have a unicode key we can avoid
1258+
// locking the shared keys.
1259+
ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
1260+
if (ix == DKIX_KEY_CHANGED) {
1261+
LOCK_KEYS(dk);
1262+
ix = splitdict_lookup_keys_lock_held(mp, key, hash,
1263+
value_addr);
1264+
UNLOCK_KEYS(dk);
1265+
return ix;
1266+
}
1267+
}
1268+
#endif
1269+
12201270
ix = unicodekeys_lookup_unicode(dk, key, hash);
1221-
UNLOCK_KEYS_IF_SPLIT(dk, kind);
12221271
}
12231272
else {
12241273
INCREF_KEYS_FT(dk);
@@ -1468,7 +1517,6 @@ _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb
14681517
Py_DECREF(value);
14691518
goto read_failed;
14701519
}
1471-
14721520
}
14731521
}
14741522
else {
@@ -1690,32 +1738,55 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp,
16901738
return 0;
16911739
}
16921740

1693-
static int
1694-
split_dict_resize_and_insert(PyInterpreterState *interp, PyDictObject *mp,
1695-
Py_hash_t hash, PyObject *key, PyObject *value)
1696-
{
1697-
/* Need to resize. */
1698-
int ins = insertion_resize(interp, mp, 1);
1699-
if (ins < 0) {
1700-
return -1;
1701-
}
1702-
assert(!_PyDict_HasSplitTable(mp));
1703-
return insert_combined_dict(interp, mp, hash, key, value);
1704-
}
1705-
1706-
static int
1741+
// Performs an insert into a split dictionary when the key is not already
1742+
// present. Returns 0 on success, -1 on error, and 1 if a race occured and
1743+
// the key got added by another thread. Consumes key and value references
1744+
// if the key and value are successfully inserted.
1745+
static inline int
17071746
insert_split_dict(PyInterpreterState *interp, PyDictObject *mp,
1708-
Py_hash_t hash, PyObject *key, PyObject *value)
1747+
Py_hash_t hash, PyObject *key, PyObject *value,
1748+
Py_ssize_t *ix, PyObject **result)
17091749
{
1750+
ASSERT_DICT_LOCKED(mp);
1751+
17101752
PyDictKeysObject *keys = mp->ma_keys;
1753+
LOCK_KEYS(keys);
1754+
1755+
#ifdef Py_GIL_DISABLED
1756+
PyObject *existing_value;
1757+
// We could have raced between our lookup before and the insert,
1758+
// so we need to lookup again with the keys locked
1759+
*ix = splitdict_lookup_keys_lock_held(mp, key, hash,
1760+
&existing_value);
1761+
if (*ix >= 0) {
1762+
UNLOCK_KEYS(keys);
1763+
*result = existing_value;
1764+
// There was a race with someone else inserting the key, the
1765+
// caller needs to handle it as they may not want to replace
1766+
// the existing value.
1767+
return 1;
1768+
}
1769+
#endif
1770+
1771+
uint64_t new_version = _PyDict_NotifyEvent(
1772+
interp, PyDict_EVENT_ADDED, mp, key, value);
1773+
keys->dk_version = 0;
17111774
if (keys->dk_usable <= 0) {
17121775
/* Need to resize. */
1776+
UNLOCK_KEYS(keys);
17131777
int ins = insertion_resize(interp, mp, 1);
17141778
if (ins < 0) {
17151779
return -1;
17161780
}
17171781
assert(!_PyDict_HasSplitTable(mp));
1718-
return insert_combined_dict(interp, mp, hash, key, value);
1782+
if (insert_combined_dict(interp, mp, hash, key, value) < 0) {
1783+
*result = NULL;
1784+
return -1;
1785+
}
1786+
1787+
*result = value;
1788+
mp->ma_version_tag = new_version;
1789+
return 0;
17191790
}
17201791

17211792
Py_ssize_t hashpos = find_empty_slot(keys, hash);
@@ -1732,34 +1803,13 @@ insert_split_dict(PyInterpreterState *interp, PyDictObject *mp,
17321803

17331804
split_keys_entry_added(keys);
17341805
assert(keys->dk_usable >= 0);
1735-
return 0;
1736-
}
17371806

1738-
#ifdef Py_GIL_DISABLED
1739-
1740-
static inline Py_ssize_t
1741-
splitdict_lookup_threadsafe(PyDictObject *mp, PyDictKeysObject *dk,
1742-
PyObject *key, Py_ssize_t hash,
1743-
PyObject **value)
1744-
{
1745-
ASSERT_DICT_LOCKED(mp);
1746-
1747-
Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
1748-
1749-
if (ix >= 0) {
1750-
*value = mp->ma_values->values[ix];
1751-
}
1752-
else if (ix == DKIX_KEY_CHANGED) {
1753-
ix = _Py_dict_lookup(mp, key, hash, value);
1754-
}
1755-
else {
1756-
*value = 0;
1757-
}
1758-
return ix;
1807+
UNLOCK_KEYS(keys);
1808+
*result = value;
1809+
mp->ma_version_tag = new_version;
1810+
return 0;
17591811
}
17601812

1761-
#endif
1762-
17631813
/*
17641814
Internal routine to insert a new item into the table.
17651815
Used both by the internal resize routine and by the public insert routine.
@@ -1780,75 +1830,37 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
17801830
assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL);
17811831
}
17821832

1783-
Py_ssize_t ix;
1784-
#ifdef Py_GIL_DISABLED
1785-
PyDictKeysObject *dk = mp->ma_keys;
1786-
DictKeysKind kind = dk->dk_kind;
1787-
1788-
if (kind == DICT_KEYS_SPLIT) {
1789-
// Split keys can be mutated by other dictionaries, so we need
1790-
// to do a threadsafe lookup here. This is basically the split-key
1791-
// half of _Py_dict_lookup_threadsafe and avoids locking the
1792-
// shared keys if we can
1793-
assert(PyUnicode_CheckExact(key));
1794-
ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value);
1795-
} else {
1796-
ix = _Py_dict_lookup(mp, key, hash, &old_value);
1797-
}
1798-
#else
1799-
ix = _Py_dict_lookup(mp, key, hash, &old_value);
1800-
#endif
1833+
Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value);
18011834
if (ix == DKIX_ERROR)
18021835
goto Fail;
18031836

18041837
MAINTAIN_TRACKING(mp, key, value);
18051838

18061839
if (ix == DKIX_EMPTY) {
1807-
uint64_t new_version;
18081840
if (!_PyDict_HasSplitTable(mp)) {
1809-
new_version = _PyDict_NotifyEvent(
1841+
uint64_t new_version = _PyDict_NotifyEvent(
18101842
interp, PyDict_EVENT_ADDED, mp, key, value);
18111843
/* Insert into new slot. */
18121844
mp->ma_keys->dk_version = 0;
18131845
if (insert_combined_dict(interp, mp, hash, key, value) < 0) {
18141846
goto Fail;
18151847
}
1848+
mp->ma_version_tag = new_version;
18161849
}
18171850
else {
1818-
LOCK_KEYS(mp->ma_keys);
1819-
1851+
int res = insert_split_dict(interp, mp, hash, key,
1852+
value, &ix, &old_value);
1853+
if (res < 0) {
1854+
return -1;
1855+
}
18201856
#ifdef Py_GIL_DISABLED
1821-
// We could have raced between our lookup before and the insert,
1822-
// so we need to lookup again with the keys locked
1823-
ix = splitdict_lookup_threadsafe(mp, dk, key, hash, &old_value);
1824-
if (ix >= 0) {
1825-
UNLOCK_KEYS(mp->ma_keys);
1857+
else if (res == 1) {
18261858
goto insert_on_split_race;
18271859
}
18281860
#endif
1829-
new_version = _PyDict_NotifyEvent(
1830-
interp, PyDict_EVENT_ADDED, mp, key, value);
1831-
/* Insert into new slot. */
1832-
mp->ma_keys->dk_version = 0;
1833-
if (mp->ma_keys->dk_usable <= 0) {
1834-
UNLOCK_KEYS(mp->ma_keys);
1835-
1836-
if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) {
1837-
goto Fail;
1838-
}
1839-
} else {
1840-
int insert = insert_split_dict(interp, mp, hash, key, value);
1841-
UNLOCK_KEYS(mp->ma_keys);
1842-
1843-
if (insert < 0) {
1844-
goto Fail;
1845-
}
1846-
}
1847-
mp->ma_keys->dk_version = new_version;
18481861
}
18491862

18501863
mp->ma_used++;
1851-
mp->ma_version_tag = new_version;
18521864
ASSERT_CONSISTENT(mp);
18531865
return 0;
18541866
}
@@ -4290,12 +4302,12 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
42904302
}
42914303

42924304
if (ix == DKIX_EMPTY) {
4293-
uint64_t new_version = _PyDict_NotifyEvent(
4294-
interp, PyDict_EVENT_ADDED, mp, key, default_value);
4295-
mp->ma_keys->dk_version = 0;
42964305
value = default_value;
42974306

42984307
if (!_PyDict_HasSplitTable(mp)) {
4308+
uint64_t new_version = _PyDict_NotifyEvent(
4309+
interp, PyDict_EVENT_ADDED, mp, key, default_value);
4310+
mp->ma_keys->dk_version = 0;
42994311
if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) {
43004312
Py_DECREF(key);
43014313
Py_DECREF(value);
@@ -4304,60 +4316,44 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
43044316
}
43054317
return -1;
43064318
}
4319+
mp->ma_version_tag = new_version;
43074320
}
43084321
else {
4309-
LOCK_KEYS(mp->ma_keys);
4310-
#ifdef Py_GIL_DISABLED
4311-
// We could have raced between our lookup before and the insert,
4312-
// so we need to lookup again with the keys locked
4313-
ix = _Py_dict_lookup(mp, key, hash, &value);
4314-
if (ix >= 0) {
4315-
UNLOCK_KEYS(mp->ma_keys);
4316-
if (value != NULL) {
4317-
if (result) {
4318-
*result = incref_result ? Py_NewRef(value) : value;
4319-
}
4320-
return 0;
4321-
}
4322-
goto insert_on_split_race;
4323-
}
4324-
#endif
4325-
if (mp->ma_keys->dk_usable <= 0) {
4326-
UNLOCK_KEYS(mp->ma_keys);
4327-
4328-
if (split_dict_resize_and_insert(interp, mp, hash, key, value) < 0) {
4329-
return -1;
4330-
}
4331-
} else {
4332-
int insert = insert_split_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value));
4333-
UNLOCK_KEYS(mp->ma_keys);
4334-
4335-
if (insert < 0) {
4336-
Py_DECREF(key);
4337-
Py_DECREF(value);
4322+
int res = insert_split_dict(interp, mp, hash, Py_NewRef(key),
4323+
Py_NewRef(default_value), &ix, &value);
4324+
if (res) {
4325+
Py_DECREF(key);
4326+
Py_DECREF(value);
4327+
if (res < 0) {
43384328
if (result) {
43394329
*result = NULL;
43404330
}
43414331
return -1;
43424332
}
4333+
4334+
#ifdef Py_GIL_DISABLED
4335+
// Raced with another thread inserting
4336+
goto insert_on_split_race;
4337+
#endif
43434338
}
43444339
}
43454340

43464341
MAINTAIN_TRACKING(mp, key, value);
43474342
mp->ma_used++;
4348-
mp->ma_version_tag = new_version;
43494343
assert(mp->ma_keys->dk_usable >= 0);
43504344
ASSERT_CONSISTENT(mp);
43514345
if (result) {
43524346
*result = incref_result ? Py_NewRef(value) : value;
43534347
}
43544348
return 0;
43554349
}
4356-
else if (value == NULL) {
4350+
43574351
#ifdef Py_GIL_DISABLED
43584352
insert_on_split_race:
43594353
#endif
4360-
uint64_t new_version = _PyDict_NotifyEvent(
4354+
if (value == NULL) {
4355+
uint64_t new_version;
4356+
new_version = _PyDict_NotifyEvent(
43614357
interp, PyDict_EVENT_ADDED, mp, key, default_value);
43624358
value = default_value;
43634359
assert(_PyDict_HasSplitTable(mp));

0 commit comments

Comments
 (0)
0