8000 gh-112075: refactor dictionary lookup functions for better re-usability by DinoV · Pull Request #114629 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-112075: refactor dictionary lookup functions for better re-usability #114629

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 95 additions & 97 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -874,85 +874,38 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
Py_UNREACHABLE();
}

// Search non-Unicode key from Unicode table
static Py_ssize_t
unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
static inline Py_ALWAYS_INLINE Py_ssize_t
do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash,
Py_ssize_t (*check_lookup)(PyDictObject *, PyDictKeysObject *, void *, Py_ssize_t ix, PyObject *key, Py_hash_t))
{
PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk);
void *ep0 = _DK_ENTRIES(dk);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
PyDictUnicodeEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key) {
Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash);
if (cmp < 0) {
return cmp;
} else if (cmp) {
return ix;
}
if (unicode_get_hash(ep->me_key) == hash) {
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
return DKIX_ERROR;
}
if (dk == mp->ma_keys && ep->me_key == startkey) {
if (cmp > 0) {
return ix;
}
}
else {
/* The dict was mutated, restart */
return DKIX_KEY_CHANGED;
}
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}

// Search Unicode key from Unicode table.
static Py_ssize_t _Py_HOT_FUNCTION
unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{
PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
PyDictUnicodeEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) {
return ix;
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
// Manual loop unrolling
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
PyDictUnicodeEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) {
Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash);
if (cmp < 0) {
return cmp;
} else if (cmp) {
return ix;
}
}
Expand All @@ -965,49 +918,94 @@ unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
Py_UNREACHABLE();
}

// Search key from Generic table.
static inline Py_ALWAYS_INLINE Py_ssize_t
compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk,
void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash)
{
PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
assert(!PyUnicode_CheckExact(key));
// TODO: Thread safety

if (unicode_get_hash(ep->me_key) == hash) {
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
return DKIX_ERROR;
}
if (dk == mp->ma_keys && ep->me_key == startkey) {
return cmp;
}
else {
/* The dict was mutated, restart */
return DKIX_KEY_CHANGED;
}
}
return 0;
}

// Search non-Unicode key from Unicode table
static Py_ssize_t
dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{
PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
if (ep->me_key == key) {
return ix;
}
if (ep->me_hash == hash) {
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
return DKIX_ERROR;
}
if (dk == mp->ma_keys && ep->me_key == startkey) {
if (cmp > 0) {
return ix;
}
}
else {
/* The dict was mutated, restart */
return DKIX_KEY_CHANGED;
}
}
return do_lookup(mp, dk, key, hash, compare_unicode_generic);
}

static inline Py_ALWAYS_INLINE Py_ssize_t
compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk,
void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash)
{
PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) {
return 1;
}
return 0;
}

static Py_ssize_t _Py_HOT_FUNCTION
unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{
return do_lookup(NULL, dk, key, hash, compare_unicode_unicode);
}

static inline Py_ALWAYS_INLINE Py_ssize_t
compare_generic(PyDictObject *mp, PyDictKeysObject *dk,
void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash)
{
PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix];
assert(ep->me_key != NULL);
if (ep->me_key == key) {
return 1;
}
if (ep->me_hash == hash) {
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
return DKIX_ERROR;
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
if (dk == mp->ma_keys && ep->me_key == startkey) {
return cmp;
}
else {
/* The dict was mutated, restart */
return DKIX_KEY_CHANGED;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
return 0;
}

static Py_ssize_t
dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{
return do_lookup(mp, dk, key, hash, compare_generic);
}

/* Lookup a string in a (all unicode) dict keys.
Expand Down
0