8000 gh-95382: Improve performance of json encoder with indent (GH-118105) · python/cpython@05adfbb · GitHub
[go: up one dir, main page]

Skip to content

Commit 05adfbb

Browse files
authored
gh-95382: Improve performance of json encoder with indent (GH-118105)
1 parent 7758be4 commit 05adfbb

File tree

3 files changed

+105
-47
lines changed

3 files changed

+105
-47
lines changed

Lib/json/encoder.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -244,15 +244,18 @@ def floatstr(o, allow_nan=self.allow_nan,
244244
return text
245245

246246

247-
if (_one_shot and c_make_encoder is not None
248-
and self.indent is None):
247+
if self.indent is None or isinstance(self.indent, str):
248+
indent = self.indent
249+
else:
250+
indent = ' ' * self.indent
251+
if _one_shot and c_make_encoder is not None:
249252
_iterencode = c_make_encoder(
250-
markers, self.default, _encoder, self.indent,
253+
markers, self.default, _encoder, indent,
251254
self.key_separator, self.item_separator, self.sort_keys,
252255
self.skipkeys, self.allow_nan)
253256
else:
254257
_iterencode = _make_iterencode(
255-
markers, self.default, _encoder, self.indent, floatstr,
258+
markers, self.default, _encoder, indent, floatstr,
256259
self.key_separator, self.item_separator, self.sort_keys,
257260
self.skipkeys, _one_shot)
258261
return _iterencode(o, 0)
@@ -272,9 +275,6 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
272275
_intstr=int.__repr__,
273276
):
274277

275-
if _indent is not None and not isinstance(_indent, str):
276-
_indent = ' ' * _indent
277-
278278
def _iterencode_list(lst, _current_indent_level):
279279
if not lst:
280280
yield '[]'
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve performance of :func:`json.dumps` and :func:`json.dump` when using the argument *indent*. Depending on the data the encoding using
2+
:func:`json.dumps` with *indent* can be up to 2 to 3 times faster.

Modules/_json.c

Lines changed: 96 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self);
8585
static int
8686
encoder_clear(PyEncoderObject *self);
8787
static int
88-
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
88+
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
8989
static int
90-
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
90+
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
9191
static int
92-
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
92+
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
9393
static PyObject *
9494
_encoded_const(PyObject *obj);
9595
static void
@@ -1251,6 +1251,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12511251
return (PyObject *)s;
12521252
}
12531253

1254+
static PyObject *
1255+
_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
1256+
{
1257+
PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
1258+
if (newline_indent != NULL && indent_level) {
1259+
PyUnicode_AppendAndDel(&newline_indent,
1260+
PySequence_Repeat(indent, indent_level));
1261+
}
1262+
return newline_indent;
1263+
}
1264+
12541265
static PyObject *
12551266
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
12561267
{
@@ -1267,10 +1278,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
12671278
_PyUnicodeWriter_Init(&writer);
12681279
writer.overallocate = 1;
12691280

1270-
if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
1281+
PyObject *newline_indent = NULL;
1282+
if (self->indent != Py_None) {
1283+
newline_indent = _create_newline_indent(self->indent, indent_level);
1284+
if (newline_indent == NULL) {
1285+
_PyUnicodeWriter_Dealloc(&writer);
1286+
return NULL;
1287+
}
1288+
}
1289+
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
12711290
_PyUnicodeWriter_Dealloc(&writer);
1291+
Py_XDECREF(newline_indent);
12721292
return NULL;
12731293
}
1294+
Py_XDECREF(newline_indent);
12741295

12751296
result = PyTuple_New(1);
12761297
if (result == NULL ||
@@ -1358,7 +1379,7 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
13581379

13591380
static int
13601381
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
1361-
PyObject *obj, Py_ssize_t indent_level)
1382+
PyObject *obj, PyObject *newline_indent)
13621383
{
13631384
/* Encode Python object obj to a JSON term */
13641385
PyObject *newobj;
@@ -1394,14 +1415,14 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
13941415
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
13951416
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
13961417
return -1;
1397-
rv = encoder_listencode_list(s, writer, obj, indent_level);
1418+
rv = encoder_listencode_list(s, writer, obj, newline_indent);
13981419
_Py_LeaveRecursiveCall();
13991420
return rv;
14001421
}
14011422
else if (PyDict_Check(obj)) {
14021423
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
14031424
return -1;
1404-
rv = encoder_listencode_dict(s, writer, obj, indent_level);
1425+
rv = encoder_listencode_dict(s, writer, obj, newline_indent);
14051426
_Py_LeaveRecursiveCall();
14061427
return rv;
14071428
}
@@ -1435,7 +1456,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
14351456
Py_XDECREF(ident);
14361457
return -1;
14371458
}
1438-
10000 rv = encoder_listencode_obj(s, writer, newobj, indent_level);
1459+
rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
14391460
_Py_LeaveRecursiveCall();
14401461

14411462
Py_DECREF(newobj);
@@ -1456,7 +1477,9 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
14561477

14571478
static int
14581479
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
1459-
PyObject *key, PyObject *value, Py_ssize_t indent_level)
1480+
PyObject *key, PyObject *value,
1481+
PyObject *newline_indent,
1482+
PyObject *item_separator)
14601483
{
14611484
PyObject *keystr = NULL;
14621485
PyObject *encoded;
@@ -1493,7 +1516,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
14931516
*first = false;
14941517
}
14951518
else {
1496-
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
1519+
if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
14971520
Py_DECREF(keystr);
14981521
return -1;
14991522
}
@@ -1511,21 +1534,23 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
15111534
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
15121535
return -1;
15131536
}
1514-
if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
1537+
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
15151538
return -1;
15161539
}
15171540
return 0;
15181541
}
15191542

15201543
static int
15211544
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
1522-
PyObject *dct, Py_ssize_t indent_level)
1545+
PyObject *dct, PyObject *newline_indent)
15231546
{
15241547
/* Encode Python dict dct a JSON term */
15251548
PyObject *ident = NULL;
15261549
PyObject *items = NULL;
15271550
PyObject *key, *value;
15281551
bool first = true;
1552+
PyObject *new_newline_indent = NULL;
1553+
PyObject *separator_indent = NULL;
15291554

15301555
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
15311556
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
@@ -1549,14 +1574,21 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
15491574
if (_PyUnicodeWriter_WriteChar(writer, '{'))
15501575
goto bail;
15511576

1577+
PyObject *current_item_separator = s->item_separator; // borrowed reference
15521578
if (s->indent != Py_None) {
1553-
/* TODO: DOES NOT RUN */
1554-
indent_level += 1;
1555-
/*
1556-
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1557-
separator = _item_separator + newline_indent
1558-
buf += newline_indent
1559-
*/
1579+
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1580+
if (new_newline_indent == NULL) {
1581+
goto bail;
1582+
}
1583+
separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
1584+
if (separator_indent == NULL) {
1585+
goto bail;
1586+
}
1587+
// update item separator with a borrowed reference
1588+
current_item_separator = separator_indent;
1589+
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1590+
goto bail;
1591+
}
15601592
}
15611593

15621594
if (s->sort_keys || !PyDict_CheckExact(dct)) {
@@ -1574,15 +1606,19 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
15741606

15751607
key = PyTuple_GET_ITEM(item, 0);
15761608
value = PyTuple_GET_ITEM(item, 1);
1577-
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
1609+
if (encoder_encode_key_value(s, writer, &first, key, value,
1610+
new_newline_indent,
1611+
current_item_separator) < 0)
15781612
goto bail;
15791613
}
15801614
Py_CLEAR(items);
15811615

15821616
} else {
15831617
Py_ssize_t pos = 0;
15841618
while (PyDict_Next(dct, &pos, &key, &value)) {
1585-
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
1619+
if (encoder_encode_key_value(s, writer, &first, key, value,
1620+
new_newline_indent,
1621+
current_item_separator) < 0)
15861622
goto bail;
15871623
}
15881624
}
@@ -1592,29 +1628,36 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
15921628
goto bail;
15931629
Py_CLEAR(ident);
15941630
}
1595-
/* TODO DOES NOT RUN; dead code
15961631
if (s->indent != Py_None) {
1597-
indent_level -= 1;
1632+
Py_CLEAR(new_newline_indent);
1633+
Py_CLEAR(separator_indent);
1634+
1635+
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1636+
goto bail;
1637+
}
1638+
}
15981639

1599-
yield '\n' + (' ' * (_indent * _current_indent_level))
1600-
}*/
16011640
if (_PyUnicodeWriter_WriteChar(writer, '}'))
16021641
goto bail;
16031642
return 0;
16041643

16051644
bail:
16061645
Py_XDECREF(items);
16071646
Py_XDECREF(ident);
1647+
Py_XDECREF(separator_indent);
1648+
Py_XDECREF(new_newline_indent);
16081649
return -1;
16091650
}
16101651

16111652
static int
16121653
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
1613-
PyObject *seq, Py_ssize_t indent_level)
1654+
PyObject *seq, PyObject *newline_indent)
16141655
{
16151656
PyObject *ident = NULL;
16161657
PyObject *s_fast = NULL;
16171658
Py_ssize_t i;
1659+
PyObject *new_newline_indent = NULL;
1660+
PyObject *separator_indent = NULL;
16181661

16191662
ident = NULL;
16201663
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@@ -1643,22 +1686,31 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
16431686

16441687
if (_PyUnicodeWriter_WriteChar(writer, '['))
16451688
goto bail;
1689+
1690+
PyObject *separator = s->item_separator; // borrowed reference
16461691
10000 if (s->indent != Py_None) {
1647-
/* TODO: DOES NOT RUN */
1648-
indent_level += 1;
1649-
/*
1650-
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1651-
separator = _item_separator + newline_indent
1652-
buf += newline_indent
1653-
*/
1692+
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1693+
if (new_newline_indent == NULL) {
1694+
goto bail;
1695+
}
1696+
1697+
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1698+
goto bail;
1699+
}
1700+
1701+
separator_indent = PyUnicode_Concat(separator, new_newline_indent);
1702+
if (separator_indent == NULL) {
1703+
goto bail;
1704+
}
1705+
separator = separator_indent; // assign separator with borrowed reference
16541706
}
16551707
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
16561708
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
16571709
if (i) {
1658-
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
1710+
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
16591711
goto bail;
16601712
}
1661-
if (encoder_listencode_obj(s, writer, obj, indent_level))
1713+
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
16621714
goto bail;
16631715
}
16641716
if (ident != NULL) {
@@ -1667,12 +1719,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
16671719
Py_CLEAR(ident);
16681720
}
16691721

1670-
/* TODO: DOES NOT RUN
16711722
if (s->indent != Py_None) {
1672-
indent_level -= 1;
1723+
Py_CLEAR(new_newline_indent);
1724+
Py_CLEAR(separator_indent);
1725+
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1726+
goto bail;
1727+
}
1728+
}
16731729

1674-
yield '\n' + (' ' * (_indent * _current_indent_level))
1675-
}*/
16761730
if (_PyUnicodeWriter_WriteChar(writer, ']'))
16771731
goto bail;
16781732
Py_DECREF(s_fast);
@@ -1681,6 +1735,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
16811735
bail:
16821736
Py_XDECREF(ident);
16831737
Py_DECREF(s_fast);
1738+
Py_XDECREF(separator_indent);
1739+
Py_XDECREF(new_newline_indent);
16841740
return -1;
16851741
}
16861742

@@ -1721,7 +1777,7 @@ encoder_clear(PyEncoderObject *self)
17211777
return 0;
17221778
}
17231779

1724-
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1780+
PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, key_separator, item_separator, sort_keys, skipkeys, allow_nan)");
17251781

17261782
static PyType_Slot PyEncoderType_slots[] = {
17271783
{Py_tp_doc, (void *)encoder_doc},

0 commit comments

Comments
 (0)
0