From f8efe27834469c7aa8e29a2deb61bf7e75e6e383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns?= Date: Sat, 14 Aug 2021 16:47:15 +0100 Subject: [PATCH] bpo-37596: compile to reproducible frozen sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Filipe LaĆ­ns --- Include/setobject.h | 9 +++++++ Objects/setobject.c | 65 +++++++++++++++++++++++++++++++++++++++++++-- Python/compile.c | 4 +-- Python/marshal.c | 5 ++-- 4 files changed, 77 insertions(+), 6 deletions(-) diff --git a/Include/setobject.h b/Include/setobject.h index 62516be5ab29be..e69e1cf0c7e2d1 100644 --- a/Include/setobject.h +++ b/Include/setobject.h @@ -67,10 +67,19 @@ typedef struct { #define PySet_GET_SIZE(so) (assert(PyAnySet_Check(so)),(((PySetObject *)(so))->used)) PyAPI_DATA(PyObject *) _PySet_Dummy; +PyAPI_DATA(PyTypeObject) _PyReproducibleFrozenSet_Type; + +PyAPI_FUNC(PyObject *) _PyReproducibleFrozenSet_New(PyObject *); PyAPI_FUNC(int) _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash); PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable); +#define _PyReproducibleFrozenSet_CheckExact(ob) \ + Py_IS_TYPE(ob, &_PyReproducibleFrozenSet_Type) +#define _PyReproducibleFrozenSet_Check(ob) \ + (Py_IS_TYPE(ob, &_PyReproducibleFrozenSet_Type) || \ + PyType_IsSubtype(Py_TYPE(ob), &_PyReproducibleFrozenSet_Type)) + #endif /* Section excluded by Py_LIMITED_API */ PyAPI_DATA(PyTypeObject) PySet_Type; diff --git a/Objects/setobject.c b/Objects/setobject.c index caff85c9e38939..ad9c046fa02efa 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -99,6 +99,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) static int set_table_resize(PySetObject *, Py_ssize_t); +PyTypeObject PyReproducibleFrozenSet_Type; + static int set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) { @@ -118,9 +120,15 @@ set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) restart: mask = so->mask; - i = (size_t)hash & mask; freeslot = NULL; - perturb = hash; + + if (_PyReproducibleFrozenSet_CheckExact(so)) { + i = 0; + perturb = 0; + } else { + i = (size_t)hash & mask; + perturb = hash; + } while (1) { entry = &so->table[i]; @@ -2233,6 +2241,53 @@ PyTypeObject PyFrozenSet_Type = { }; +PyTypeObject _PyReproducibleFrozenSet_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "reproducible_frozenset", /* tp_name */ + sizeof(PySetObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)set_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)set_repr, /* tp_repr */ + &frozenset_as_number, /* tp_as_number */ + &set_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + frozenset_hash, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE | + _Py_TPFLAGS_MATCH_SELF, /* tp_flags */ + frozenset_doc, /* tp_doc */ + (traverseproc)set_traverse, /* tp_traverse */ + (inquiry)set_clear_internal, /* tp_clear */ + (richcmpfunc)set_richcompare, /* tp_richcompare */ + offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ + (getiterfunc)set_iter, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyFrozenSet_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + frozenset_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ + .tp_vectorcall = frozenset_vectorcall, +}; + + /***** C API functions *************************************************/ PyObject * @@ -2247,6 +2302,12 @@ PyFrozenSet_New(PyObject *iterable) return make_new_set(&PyFrozenSet_Type, iterable); } +PyObject * +_PyReproducibleFrozenSet_New(PyObject *iterable) +{ + return make_new_set(&_PyReproducibleFrozenSet_Type, iterable); +} + Py_ssize_t PySet_Size(PyObject *anyset) { diff --git a/Python/compile.c b/Python/compile.c index e651ca535191af..dd2271c92d5a50 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1443,7 +1443,7 @@ merge_consts_recursive(struct compiler *c, PyObject *o) // Instead of rewriting o, we create new frozenset and embed in the // key tuple. Caller should get merged frozenset from the key tuple. - PyObject *new = PyFrozenSet_New(tuple); + PyObject *new = _PyReproducibleFrozenSet_New(tuple); Py_DECREF(tuple); if (new == NULL) { Py_DECREF(key); @@ -3910,7 +3910,7 @@ starunpack_helper(struct compiler *c, asdl_expr_seq *elts, int pushed, ADDOP_LOAD_CONST_NEW(c, folded); } else { if (add == SET_ADD) { - Py_SETREF(folded, PyFrozenSet_New(folded)); + Py_SETREF(folded, _PyReproducibleFrozenSet_New(folded)); if (folded == NULL) { return 0; } diff --git a/Python/marshal.c b/Python/marshal.c index 1260704c74c0be..556d15037d3583 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -492,12 +492,13 @@ w_complex_object(PyObject *v, char flag, WFILE *p) } w_object((PyObject *)NULL, p); } - else if (PyAnySet_CheckExact(v)) { + else if (PyAnySet_CheckExact(v) || _PyReproducibleFrozenSet_CheckExact(v)) { PyObject *value; Py_ssize_t pos = 0; Py_hash_t hash; - if (PyFrozenSet_CheckExact(v)) + if (PyFrozenSet_CheckExact(v) || + _PyReproducibleFrozenSet_CheckExact(v)) W_TYPE(TYPE_FROZENSET, p); else W_TYPE(TYPE_SET, p);