8000 bpo-37596: Make `set` and `frozenset` marshalling deterministic (GH-2… · python/cpython@33d95c6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 33d95c6

Browse files
authored
bpo-37596: Make set and frozenset marshalling deterministic (GH-27926)
1 parent 7ecd342 commit 33d95c6

File tree

3 files changed

+59
-0
lines changed

3 files changed

+59
-0
lines changed

Lib/test/test_marshal.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,31 @@ def test_eof(self):
344344
for i in range(len(data)):
345345
self.assertRaises(EOFError, marshal.loads, data[0: i])
346346

347+
def test_deterministic_sets(self):
348+
# bpo-37596: To support reproducible builds, sets and frozensets need to
349+
# have their elements serialized in a consistent order (even when they
350+
# have been scrambled by hash randomization):
351+
for kind in ("set", "frozenset"):
352+
for elements in (
353+
"float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'",
354+
# Also test for bad interactions with backreferencing:
355+
"('string', 1), ('string', 2), ('string', 3)",
356+
):
357+
s = f"{kind}([{elements}])"
358+
with self.subTest(s):
359+
# First, make sure that our test case still has different
360+
# orders under hash seeds 0 and 1. If this check fails, we
361+
# need to update this test with different elements:
362+
args = ["-c", f"print({s})"]
363+
_, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
364+
_, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
365+
self.assertNotEqual(repr_0, repr_1)
366+
# Then, perform the actual test:
367+
args = ["-c", f"import marshal; print(marshal.dumps({s}))"]
368+
_, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0")
369+
_, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1")
370+
self.assertEqual(dump_0, dump_1)
371+
347372
LARGE_SIZE = 2**31
348373
pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4
349374

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Ensure that :class:`set` and :class:`frozenset` objects are always
2+
:mod:`marshalled <marshal>` reproducibly.

Python/marshal.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,9 +503,41 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
503503
W_TYPE(TYPE_SET, p);
504504
n = PySet_GET_SIZE(v);
505505
W_SIZE(n, p);
506+
// bpo-37596: To support reproducible builds, sets and frozensets need
507+
// to have their elements serialized in a consistent order (even when
508+
// they have been scrambled by hash randomization). To ensure this, we
509+
// use an order equivalent to sorted(v, key=marshal.dumps):
510+
PyObject *pairs = PyList_New(0);
511+
if (pairs == NULL) {
512+
p->error = WFERR_NOMEMORY;
513+
return;
514+
}
506515
while (_PySet_NextEntry(v, &pos, &value, &hash)) {
516+
PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
517+
if (dump == NULL) {
518+
p->error = WFERR_UNMARSHALLABLE;
519+
goto anyset_done;
520+
}
521+
PyObject *pair = PyTuple_Pack(2, dump, value);
522+
Py_DECREF(dump);
523+
if (pair == NULL || PyList_Append(pairs, pair)) {
524+
p->error = WFERR_NOMEMORY;
525+
Py_XDECREF(pair);
526+
goto anyset_done;
527+
}
528+
Py_DECREF(pair);
529+
}
530+
if (PyList_Sort(pairs)) {
531+
p->error = WFERR_NOMEMORY;
532+
goto anyset_done;
533+
}
534+
for (Py_ssize_t i = 0; i < n; i++) {
535+
PyObject *pair = PyList_GET_ITEM(pairs, i);
536+
value = PyTuple_GET_ITEM(pair, 1);
507537
w_object(value, p);
508538
}
539+
anyset_done:
540+
Py_DECREF(pairs);
509541
}
510542
else if (PyCode_Check(v)) {
511543
PyCodeObject *co = (PyCodeObject *)v;

0 commit comments

Comments
 (0)
0