10000 bpo-24275: Don't downgrade unicode-only dicts to mixed on lookups (GH… · python/cpython@8557edb · GitHub
[go: up one dir, main page]

Skip to content

Commit 8557edb

Browse files
authored
bpo-24275: Don't downgrade unicode-only dicts to mixed on lookups (GH-25186)
1 parent 69a733b commit 8557edb

File tree

2 files changed

+106
-3
lines changed

2 files changed

+106
-3
lines changed

Lib/test/test_dict.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1471,6 +1471,106 @@ def test_dict_items_result_gc(self):
14711471
gc.collect()
14721472
self.assertTrue(gc.is_tracked(next(it)))
14731473

1474+
def test_str_nonstr(self):
1475+
# cpython uses a different lookup function if the dict only contains
1476+
# `str` keys. Make sure the unoptimized path is used when a non-`str`
1477+
# key appears.
1478+
1479+
class StrSub(str):
1480+
pass
1481+
1482+
eq_count = 0
1483+
# This class compares equal to the string 'key3'
1484+
class Key3:
1485+
def __hash__(self):
1486+
return hash('key3')
1487+
1488+
def __eq__(self, other):
1489+
nonlocal eq_count
1490+
if isinstance(other, Key3) or isinstance(other, str) and other == 'key3':
1491+
eq_count += 1
1492+
return True
1493+
return False
1494+
1495+
key3_1 = StrSub('key3')
1496+
key3_2 = Key3()
1497+
key3_3 = Key3()
1498+
1499+
dicts = []
1500+
1501+
# Create dicts of the form `{'key1': 42, 'key2': 43, key3: 44}` in a
1502+
# bunch of different ways. In all cases, `key3` is not of type `str`.
1503+
# `key3_1` is a `str` subclass and `key3_2` is a completely unrelated
1504+
# type.
1505+
for key3 in (key3_1, key3_2):
1506+
# A literal
1507+
dicts.append({'key1': 42, 'key2': 43, key3: 44})
1508+
1509+
# key3 inserted via `dict.__setitem__`
1510+
d = {'key1': 42, 'key2': 43}
1511+
d[key3] = 44
1512+
dicts.append(d)
1513+
1514+
# key3 inserted via `dict.setdefault`
1515+
d = {'key1': 42, 'key2': 43}
1516+
self.assertEqual(d.setdefault(key3, 44), 44)
1517+
dicts.append(d)
1518+
1519+
# key3 inserted via `dict.update`
1520+
d = {'key1': 42, 'key2': 43}
1521+
d.update({key3: 44})
1522+
dicts.append(d)
1523+
1524+
# key3 inserted via `dict.__ior__`
1525+
d = {'key1': 42, 'key2': 43}
1526+
d |= {key3: 44}
1527+
dicts.append(d)
1528+
1529+
# `dict(iterable)`
1530+
def make_pairs():
1531+
yield ('key1', 42)
1532+
yield ('key2', 43)
1533+
yield (key3, 44)
1534+
d = dict(make_pairs())
1535+
dicts.append(d)
1536+
1537+
# `dict.copy`
1538+
d = d.copy()
1539+
dicts.append(d)
1540+
1541+
# dict comprehension
1542+
d = {key: 42 + i for i,key in enumerate(['key1', 'key2', key3])}
1543+
dicts.append(d)
1544+
1545+
for d in dicts:
1546+
with self.subTest(d=d):
1547+
self.assertEqual(d.get('key1'), 42)
1548+
1549+
# Try to make an object that is of type `str` and is equal to
1550+
# `'key1'`, but (at least on cpython) is a different object.
1551+
noninterned_key1 = 'ke'
1552+
noninterned_key1 += 'y1'
1553+
if support.check_impl_detail(cpython=True):
1554+
# suppress a SyntaxWarning
1555+
interned_key1 = 'key1'
1556+
self.assertFalse(noninterned_key1 is interned_key1)
1557+
self.assertEqual(d.get(noninterned_key1), 42)
1558+
1559+
self.assertEqual(d.get('key3'), 44)
1560+
self.assertEqual(d.get(key3_1), 44)
1561+
self.assertEqual(d.get(key3_2), 44)
1562+
1563+
# `key3_3` itself is definitely not a dict key, so make sure
1564+
# that `__eq__` gets called.
1565+
#
1566+
# Note that this might not hold for `key3_1` and `key3_2`
1567+
# because they might be the same object as one of the dict keys,
1568+
# in which case implementations are allowed to skip the call to
1569+
# `__eq__`.
1570+
eq_count = 0
1571+
self.assertEqual(d.get(key3_3), 44)
1572+
self.assertGreaterEqual(eq_count, 1)
1573+
14741574

14751575
class CAPITest(unittest.TestCase):
14761576

Objects/dictobject.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,6 @@ lookdict_unicode(PyDictObject *mp, PyObject *key,
857857
unicodes is to override __eq__, and for speed we don't cater to
858858
that here. */
859859
if (!PyUnicode_CheckExact(key)) {
860-
mp->ma_keys->dk_lookup = lookdict;
861860
return lookdict(mp, key, hash, value_addr);
862861
}
863862

@@ -900,7 +899,6 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
900899
unicodes is to override __eq__, and for speed we don't cater to
901900
that here. */
902901
if (!PyUnicode_CheckExact(key)) {
903-
mp->ma_keys->dk_lookup = lookdict;
904902
return lookdict(mp, key, hash, value_addr);
905903
}
906904

@@ -1084,7 +1082,6 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
10841082
if (ix == DKIX_ERROR)
10851083
goto Fail;
10861084

1087-
assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
10881085
MAINTAIN_TRACKING(mp, key, value);
10891086

10901087
/* When insertion order is different from shared key, we can't share
@@ -1106,6 +1103,9 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
11061103
if (insertion_resize(mp) < 0)
11071104
goto Fail;
11081105
}
1106+
if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_lookup != lookdict) {
1107+
mp->ma_keys->dk_lookup = lookdict;
1108+
}
11091109
Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
11101110
ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
11111111
dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
@@ -3068,6 +3068,9 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
30683068
return NULL;
30693069
}
30703070
}
3071+
if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_lookup != lookdict) {
3072+
mp->ma_keys->dk_lookup = lookdict;
3073+
}
30713074
Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
30723075
ep0 = DK_ENTRIES(mp->ma_keys);
30733076
ep = &ep0[mp->ma_keys->dk_nentries];

0 commit comments

Comments
 (0)
0