10000 Use RecursionError to check for circular objects · python/cpython@db37d63 · GitHub
[go: up one dir, main page]

Skip to content

Commit db37d63

Browse files
aivarskgpshead
authored andcommitted
Use RecursionError to check for circular objects
Other fast JSON encoders rely on RecursionError to detect circular references: - ultrajson -- OverflowError: Maximum recursion level reached - orjson -- TypeError: Recursion limit reached Python's json module kept a dictionary with visited objects. It is too much work for a nicer error message. Instead raise a ValueError on RecursionError to keep the API but do not track the objects.
1 parent 98a5b83 commit db37d63

File tree

3 files changed

+24
-115
lines changed

3 files changed

+24
-115
lines changed

Lib/json/encoder.py

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -194,12 +194,23 @@ def encode(self, o):
194194
return encode_basestring_ascii(o)
195195
else:
196196
return encode_basestring(o)
197-
# This doesn't pass the iterator directly to ''.join() because the
198-
# exceptions aren't as detailed. The list call should be roughly
199-
# equivalent to the PySequence_Fast that ''.join() would do.
200-
chunks = self.iterencode(o, _one_shot=True)
201-
if not isinstance(chunks, (list, tuple)):
202-
chunks = list(chunks)
197+
198+
# There are tests for bad bool args
199+
bool(self.check_circular)
200+
try:
201+
# This doesn't pass the iterator directly to ''.join() because the
202+
# exceptions aren't as detailed. The list call should be roughly
203+
# equivalent to the PySequence_Fast that ''.join() would do.
204+
chunks = self.iterencode(o, _one_shot=True)
205+
if not isinstance(chunks, (list, tuple)):
206+
chunks = list(chunks)
207+
except RecursionError as exc:
208+
if self.check_circular:
209+
err = ValueError("Circular reference detected")
210+
if notes := getattr(exc, "__notes__", None):
211+
err.__notes__ = notes
212+
raise err
213+
raise
203214
return ''.join(chunks)
204215

205216
def iterencode(self, o, _one_shot=False):
@@ -212,10 +223,7 @@ def iterencode(self, o, _one_shot=False):
212223
mysocket.write(chunk)
213224
214225
"""
215-
if self.check_circular:
216-
markers = {}
217-
else:
218-
markers = None
226+
markers = None
219227
if self.ensure_ascii:
220228
_encoder = encode_basestring_ascii
221229
else:
@@ -279,11 +287,6 @@ def _iterencode_list(lst, _current_indent_level):
279287
if not lst:
280288
yield '[]'
281289
return
282-
if markers is not None:
283-
markerid = id(lst)
284-
if markerid in markers:
285-
raise ValueError("Circular reference detected")
286-
markers[markerid] = lst
287290
buf = '['
288291
if _indent is not None:
289292
_current_indent_level += 1
@@ -331,18 +334,11 @@ def _iterencode_list(lst, _current_indent_level):
331334
_current_indent_level -= 1
332335
yield '\n' + _indent * _current_indent_level
333336
yield ']'
334-
if markers is not None:
335-
del markers[markerid]
336337

337338
def _iterencode_dict(dct, _current_indent_level):
338339
if not dct:
339340
yield '{}'
340341
return
341-
if markers is not None:
342-
markerid = id(dct)
343-
if markerid in markers:
344-
raise ValueError("Circular reference detected")
345-
markers[markerid] = dct
346342
yield '{'
347343
if _indent is not None:
348344
_current_indent_level += 1
@@ -417,8 +413,6 @@ def _iterencode_dict(dct, _current_indent_level):
417413
_current_indent_level -= 1
418414
yield '\n' + _indent * _current_indent_level
419415
yield '}'
420-
if markers is not None:
421-
del markers[markerid]
422416

423417
def _iterencode(o, _current_indent_level):
424418
if isinstance(o, str):
@@ -440,11 +434,6 @@ def _iterencode(o, _current_indent_level):
440434
elif isinstance(o, dict):
441435
yield from _iterencode_dict(o, _current_indent_level)
442436
else:
443-
if markers is not None:
444-
markerid = id(o)
445-
if markerid in markers:
446-
raise ValueError("Circular reference detected")
447-
markers[markerid] = o
448437
newobj = _default(o)
449438
try:
450439
yield from _iterencode(newobj, _current_indent_level)
@@ -453,6 +442,4 @@ def _iterencode(o, _current_indent_level):
453442
except BaseException as exc:
454443
exc.add_note(f'when serializing {type(o).__name__} object')
455444
raise
456-
if markers is not None:
457-
del markers[markerid]
458445
return _iterencode

Lib/test/test_json/test_recursion.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def test_listrecursion(self):
1313
try:
1414
self.dumps(x)
1515
except ValueError as exc:
16-
self.assertEqual(exc.__notes__, ["when serializing list item 0"])
16+
self.assertEqual(exc.__notes__[:1], ["when serializing list item 0"])
1717
else:
1818
self.fail("didn't raise ValueError on list recursion")
1919
x = []
@@ -22,7 +22,7 @@ def test_listrecursion(self):
2222
try:
2323
self.dumps(x)
2424
except ValueError as exc:
25-
self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
25+
self.assertEqual(exc.__notes__[:2], ["when serializing list item 0"]*2)
2626
else:
2727
self.fail("didn't raise ValueError on alternating list recursion")
2828
y = []
@@ -36,7 +36,7 @@ def test_dictrecursion(self):
3636
try:
3737
self.dumps(x)
3838
except ValueError as exc:
39-
self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"])
39+
self.assertEqual(exc.__notes__[:1], ["when serializing dict item 'test'"])
4040
else:
4141
self.fail("didn't raise ValueError on dict recursion")
4242
x = {}
@@ -61,7 +61,7 @@ def default(self, o):
6161
try:
6262
enc.encode(JSONTestObject)
6363
except ValueError as exc:
64-
self.assertEqual(exc.__notes__,
64+
self.assertEqual(exc.__notes__[:2],
6565
["when serializing list item 0",
6666
"when serializing type object"])
6767
else:
@@ -94,10 +94,10 @@ def test_highly_nested_objects_encoding(self):
9494
l, d = [l], {'k':d}
9595
with self.assertRaises(RecursionError):
9696
with support.infinite_recursion(5000):
97-
self.dumps(l)
97+
self.dumps(l, check_circular=False)
9898
with self.assertRaises(RecursionError):
9999
with support.infinite_recursion(5000):
100-
self.dumps(d)
100+
self.dumps(d, check_circular=False)
101101

102102
@support.skip_emscripten_stack_overflow()
103103
@support.skip_wasi_stack_overflow()

Modules/_json.c

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,6 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12421242
if (s == NULL)
12431243
return NULL;
12441244

1245-
s->markers = Py_NewRef(markers);
12461245
s->defaultfn = Py_NewRef(defaultfn);
12471246
s->encoder = Py_NewRef(encoder);
1248< 10000 /td>1247
s->indent = Py_NewRef(indent);
@@ -1521,33 +1520,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
15211520
return rv;
15221521
}
15231522
else {
1524-
PyObject *ident = NULL;
1525-
if (s->markers != Py_None) {
1526-
int has_key;
1527-
ident = PyLong_FromVoidPtr(obj);
1528-
if (ident == NULL)
1529-
return -1;
1530-
has_key = PyDict_Contains(s->markers, ident);
1531-
if (has_key) {
1532-
if (has_key != -1)
1533-
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1534-
Py_DECREF(ident);
1535-
return -1;
1536-
}
1537-
if (PyDict_SetItem(s->markers, ident, obj)) {
1538-
Py_DECREF(ident);
1539-
return -1;
1540-
}
1541-
}
15421523
newobj = PyObject_CallOneArg(s->defaultfn, obj);
15431524
if (newobj == NULL) {
1544-
Py_XDECREF(ident);
15451525
return -1;
15461526
}
15471527

15481528
if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
15491529
Py_DECREF(newobj);
1550-
Py_XDECREF(ident);
15511530
return -1;
15521531
}
15531532
rv = encoder_listencode_obj(s, writer, newobj, indent_level, indent_cache);
@@ -1556,16 +1535,8 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
15561535
Py_DECREF(newobj);
15571536
if (rv) {
15581537
_PyErr_FormatNote("when serializing %T object", obj);
1559-
Py_XDECREF(ident);
15601538
return -1;
15611539
}
1562-
if (ident != NULL) {
1563-
if (PyDict_DelItem(s->markers, ident)) {
1564-
Py_XDECREF(ident);
1565-
return -1;
1566-
}
1567-
Py_XDECREF(ident);
1568-
}
15691540
return rv;
15701541
}
15711542
}
@@ -1642,7 +1613,6 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16421613
Py_ssize_t indent_level, PyObject *indent_cache)
16431614
{
16441615
/* Encode Python dict dct a JSON term */
1645-
PyObject *ident = NULL;
16461616
PyObject *items = NULL;
16471617
PyObject *key, *value;
16481618
bool first = true;
@@ -1652,22 +1622,6 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16521622
return PyUnicodeWriter_WriteASCII(writer, "{}", 2);
16531623
}
16541624

1655-
if (s->markers != Py_None) {
1656-
int has_key;
1657-
ident = PyLong_FromVoidPtr(dct);
1658-
if (ident == NULL)
1659-
goto bail;
1660-
has_key = PyDict_Contains(s->markers, ident);
1661-
if (has_key) {
1662-
if (has_key != -1)
1663-
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1664-
goto bail;
1665-
}
1666-
if (PyDict_SetItem(s->markers, ident, dct)) {
1667-
goto bail;
1668-
}
1669-
}
1670-
16711625
if (PyUnicodeWriter_WriteChar(writer, '{')) {
16721626
goto bail;
16731627
}
@@ -1715,11 +1669,6 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
17151669
}
17161670
}
17171671

1718-
if (ident != NULL) {
1719-
if (PyDict_DelItem(s->markers, ident))
1720-
goto bail;
1721-
Py_CLEAR(ident);
1722-
}
17231672
if (s->indent != Py_None) {
17241673
indent_level--;
17251674
if (write_newline_indent(writer, indent_level, indent_cache) < 0) {
@@ -1734,7 +1683,6 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
17341683

17351684
bail:
17361685
Py_XDECREF(items);
1737-
Py_XDECREF(ident);
17381686
return -1;
17391687
}
17401688

@@ -1743,11 +1691,9 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17431691
B41A PyObject *seq,
17441692
Py_ssize_t indent_level, PyObject *indent_cache)
17451693
{
1746-
PyObject *ident = NULL;
17471694
PyObject *s_fast = NULL;
17481695
Py_ssize_t i;
17491696

1750-
ident = NULL;
17511697
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
17521698
if (s_fast == NULL)
17531699
return -1;
@@ -1756,22 +1702,6 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17561702
return PyUnicodeWriter_WriteASCII(writer, "[]", 2);
17571703
}
17581704

1759-
if (s->markers != Py_None) {
1760-
int has_key;
1761-
ident = PyLong_FromVoidPtr(seq);
1762-
if (ident == NULL)
1763-
goto bail;
1764-
has_key = PyDict_Contains(s->markers, ident);
1765-
if (has_key) {
1766-
if (has_key != -1)
1767-
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1768-
goto bail;
1769-
}
1770-
if (PyDict_SetItem(s->markers, ident, seq)) {
1771-
goto bail;
1772-
}
1773-
}
1774-
17751705
if (PyUnicodeWriter_WriteChar(writer, '[')) {
17761706
goto bail;
17771707
}
@@ -1797,11 +1727,6 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17971727
goto bail;
17981728
}
17991729
}
1800-
if (ident != NULL) {
1801-
if (PyDict_DelItem(s->markers, ident))
1802-
goto bail;
1803-
Py_CLEAR(ident);
1804-
}
18051730

18061731
if (s->indent != Py_None) {
18071732
indent_level--;
@@ -1817,7 +1742,6 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
18171742
return 0;
18181743

18191744
bail:
1820-
Py_XDECREF(ident);
18211745
Py_DECREF(s_fast);
18221746
return -1;
18231747
}
@@ -1838,7 +1762,6 @@ encoder_traverse(PyObject *op, visitproc visit, void *arg)
18381762
{
18391763
PyEncoderObject *self = PyEncoderObject_CAST(op);
18401764
Py_VISIT(Py_TYPE(self));
1841-
Py_VISIT(self->markers);
18421765
Py_VISIT(self->defaultfn);
18431766
Py_VISIT(self->encoder);
18441767
Py_VISIT(self->indent);
@@ -1852,7 +1775,6 @@ encoder_clear(PyObject *op)
18521775
{
18531776
PyEncoderObject *self = PyEncoderObject_CAST(op);
18541777
/* Deallocate Encoder */
1855-
Py_CLEAR(self->markers);
18561778
Py_CLEAR(self->defaultfn);
18571779
Py_CLEAR(self->encoder);
18581780
Py_CLEAR(self->indent);

0 commit comments

Comments
 (0)
0