8000 gh-81283: compiler: remove indent from docstring (#106411) · python/cpython@2566b74 · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 2566b74

Browse files
methanemerwok
andauthored
gh-81283: compiler: remove indent from docstring (#106411)
Co-authored-by: Éric <merwok@netwok.org>
1 parent bbf6297 commit 2566b74

File tree

9 files changed

+246
-30
lines changed

9 files changed

+246
-30
lines changed

Doc/whatsnew/3.13.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@ Other Language Changes
7979
* Allow the *count* argument of :meth:`str.replace` to be a keyword.
8080
(Contributed by Hugo van Kemenade in :gh:`106487`.)
8181

82+
* Compiler now strip indents from docstrings.
83+
This will reduce the size of :term:`bytecode cache <bytecode>` (e.g. ``.pyc`` file).
84+
For example, cache file size for ``sqlalchemy.orm.session`` in SQLAlchemy 2.0
85+
is reduced by about 5%.
86+
This change will affect tools using docstrings, like :mod:`doctest`.
87+
(Contributed by Inada Naoki in :gh:`81283`.)
88+
8289
New Modules
8390
===========
8491

Include/internal/pycore_compile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);
9191

9292
/* Access compiler internals for unit testing */
9393

94+
PyAPI_FUNC(PyObject*) _PyCompile_CleanDoc(PyObject *doc);
95+
9496
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
9597
PyObject *ast,
9698
PyObject *filename,

Lib/inspect.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -881,29 +881,28 @@ def cleandoc(doc):
881881
882882
Any whitespace that can be uniformly removed from the second line
883883
onwards is removed."""
884-
try:
885-
lines = doc.expandtabs().split('\n')
886-
except UnicodeError:
887-
return None
888-
else:
889-
# Find minimum indentation of any non-blank lines after first line.
890-
margin = sys.maxsize
891-
for line in lines[1:]:
892-
content = len(line.lstrip())
893-
if content:
894-
indent = len(line) - content
895-
margin = min(margin, indent)
896-
# Remove indentation.
897-
if lines:
898-
lines[0] = lines[0].lstrip()
899-
if margin < sys.maxsize:
900-
for i in range(1, len(lines)): lines[i] = lines[i][margin:]
901-
# Remove any trailing or leading blank lines.
902-
while lines and not lines[-1]:
903-
lines.pop()
904-
while lines and not lines[0]:
905-
lines.pop(0)
906-
return '\n'.join(lines)
884+
lines = doc.expandtabs().split('\n')
885+
886+
# Find minimum indentation of any non-blank lines after first line.
887+
margin = sys.maxsize
888+
for line in lines[1:]:
889+
content = len(line.lstrip(' '))
890+
if content:
891+
indent = len(line) - content
892+
margin = min(margin, indent)
893+
# Remove indentation.
894+
if lines:
895+
lines[0] = lines[0].lstrip(' ')
896+
if margin < sys.maxsize:
897+
for i in range(1, len(lines)):
898+
lines[i] = lines[i][margin:]
899+
# Remove any trailing or leading blank lines.
900+
while lines and not lines[-1]:
901+
lines.pop()
902+
while lines and not lines[0]:
903+
lines.pop(0)
904+
return '\n'.join(lines)
905+
907906

908907
def getfile(object):
909908
"""Work out which source or compiled file an object was defined in."""

Lib/test/test_doctest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,14 +1287,14 @@ def optionflags(): r"""
12871287
treated as equal:
12881288
12891289
>>> def f(x):
1290-
... '>>> print(1, 2, 3)\n 1 2\n 3'
1290+
... '\n>>> print(1, 2, 3)\n 1 2\n 3'
12911291
12921292
>>> # Without the flag:
12931293
>>> test = doctest.DocTestFinder().find(f)[0]
12941294
>>> doctest.DocTestRunner(verbose=False).run(test)
12951295
... # doctest: +ELLIPSIS
12961296
**********************************************************************
1297-
File ..., line 2, in f
1297+
File ..., line 3, in f
12981298
Failed example:
12991299
print(1, 2, 3)
13001300
Expected:

Lib/test/test_inspect.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,9 +596,40 @@ def test_finddoc(self):
596596
self.assertEqual(finddoc(int.from_bytes), int.from_bytes.__doc__)
597597
self.assertEqual(finddoc(int.real), int.real.__doc__)
598598

599+
cleandoc_testdata = [
600+
# first line should have different margin
601+
(' An\n indented\n docstring.', 'An\nindented\n docstring.'),
602+
# trailing whitespace are not removed.
603+
(' An \n \n indented \n docstring. ',
604+
'An \n \nindented \n docstring. '),
605+
# NUL is not termination.
606+
('doc\0string\n\n second\0line\n third\0line\0',
607+
'doc\0string\n\nsecond\0line\nthird\0line\0'),
608+
# first line is lstrip()-ped. other lines are kept when no margin.[w:
609+
(' ', ''),
610+
# compiler.cleandoc() doesn't strip leading/trailing newlines
611+
# to keep maximum backward compatibility.
612+
# inspect.cleandoc() removes them.
613+
('\n\n\n first paragraph\n\n second paragraph\n\n',
614+
'\n\n\nfirst paragraph\n\n second paragraph\n\n'),
615+
(' \n \n \n ', '\n \n \n '),
616+
]
617+
599618
def test_cleandoc(self):
600-
self.assertEqual(inspect.cleandoc('An\n indented\n docstring.'),
601-
'An\nindented\ndocstring.')
619+
func = inspect.cleandoc
620+
for i, (input, expected) in enumerate(self.cleandoc_testdata):
621+
# only inspect.cleandoc() strip \n
622+
expected = expected.strip('\n')
623+
with self.subTest(i=i):
624+
self.assertEqual(func(input), expected)
625+
626+
@cpython_only
627+
def test_c_cleandoc(self):
628+
import _testinternalcapi
629+
func = _testinternalcapi.compiler_cleandoc
630+
for i, (input, expected) in enumerate(self.cleandoc_testdata):
631+
with self.subTest(i=i):
632+
self.assertEqual(func(input), expected)
602633

603634
def test_getcomments(self):
604635
self.assertEqual(inspect.getcomments(mod), '# line 1\n')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Compiler now strips indents from docstrings. It reduces ``pyc`` file size 5%
2+
when the module is heavily documented. This change affects to ``__doc__`` so
3+
tools like doctest will be affected.

Modules/_testinternalcapi.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
1616
#include "pycore_bitutils.h" // _Py_bswap32()
1717
#include "pycore_bytesobject.h" // _PyBytes_Find()
18-
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble
18+
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble, _PyCompile_CleanDoc
1919
#include "pycore_ceval.h" // _PyEval_AddPendingCall
2020
#include "pycore_fileutils.h" // _Py_normpath
2121
#include "pycore_frame.h" // _PyInterpreterFrame
@@ -704,6 +704,23 @@ set_eval_frame_record(PyObject *self, PyObject *list)
704704
Py_RETURN_NONE;
705705
}
706706

707+
/*[clinic input]
708+
709+
_testinternalcapi.compiler_cleandoc -> object
710+
711+
doc: unicode
712+
713+
C implementation of inspect.cleandoc().
714+
[clinic start generated code]*/
715+
716+
static PyObject *
717+
_testinternalcapi_compiler_cleandoc_impl(PyObject *module, PyObject *doc)
718+
/*[clinic end generated code: output=2dd203a80feff5bc input=2de03fab931d9cdc]*/
719+
{
720+
return _PyCompile_CleanDoc(doc);
721+
}
722+
723+
707724
/*[clinic input]
708725
709726
_testinternalcapi.compiler_codegen -> object
@@ -1448,6 +1465,7 @@ static PyMethodDef module_functions[] = {
14481465
{"DecodeLocaleEx", decode_locale_ex, METH_VARARGS},
14491466
{"set_eval_frame_default", set_eval_frame_default, METH_NOARGS, NULL},
14501467
{"set_eval_frame_record", set_eval_frame_record, METH_O, NULL},
1468+
_TESTINTERNALCAPI_COMPILER_CLEANDOC_METHODDEF
14511469
_TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF
14521470
_TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF
14531471
_TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF

Modules/clinic/_testinternalcapi.c.h

Lines changed: 60 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/compile.c

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1704,10 +1704,16 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts)
17041704
if (c->c_optimize < 2) {
17051705
docstring = _PyAST_GetDocString(stmts);
17061706
if (docstring) {
1707+
PyObject *cleandoc = _PyCompile_CleanDoc(docstring);
1708+
if (cleandoc == NULL) {
1709+
return ERROR;
1710+
}
17071711
i = 1;
17081712
st = (stmt_ty)asdl_seq_GET(stmts, 0);
17091713
assert(st->kind == Expr_kind);
1710-
VISIT(c, expr, st->v.Expr.value);
1714+
location loc = LOC(st->v.Expr.value);
1715+
ADDOP_LOAD_CONST(c, loc, cleandoc);
1716+
Py_DECREF(cleandoc);
17111717
RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store));
17121718
}
17131719
}
@@ -2252,11 +2258,19 @@ compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t f
22522258
/* if not -OO mode, add docstring */
22532259
if (c->c_optimize < 2) {
22542260
docstring = _PyAST_GetDocString(body);
2261+
if (docstring) {
2262+
docstring = _PyCompile_CleanDoc(docstring);
2263+
if (docstring == NULL) {
2264+
compiler_exit_scope(c);
2265+
return ERROR;
2266+
}
2267+
}
22552268
}
22562269
if (compiler_add_const(c->c_const_cache, c->u, docstring ? docstring : Py_None) < 0) {
22572270
compiler_exit_scope(c);
22582271
return ERROR;
22592272
}
2273+
Py_XDECREF(docstring);
22602274

22612275
c->u->u_metadata.u_argcount = asdl_seq_LEN(args->args);
22622276
c->u->u_metadata.u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
@@ -7967,6 +7981,89 @@ cfg_to_instructions(cfg_builder *g)
79677981
return NULL;
79687982
}
79697983

7984+
// C implementation of inspect.cleandoc()
7985+
//
7986+
// Difference from inspect.cleandoc():
7987+
// - Do not remove leading and trailing blank lines to keep lineno.
7988+
PyObject *
7989+
_PyCompile_CleanDoc(PyObject *doc)
7990+
{
7991+
doc = PyObject_CallMethod(doc, "expandtabs", NULL);
7992+
if (doc == NULL) {
7993+
return NULL;
7994+
}
7995+
7996+
Py_ssize_t doc_size;
7997+
const char *doc_utf8 = PyUnicode_AsUTF8AndSize(doc, &doc_size);
7998+
if (doc_utf8 == NULL) {
7999+
Py_DECREF(doc);
8000+
return NULL;
8001+
}
8002+
const char *p = doc_utf8;
8003+
const char *pend = p + doc_size;
8004+
8005+
// First pass: find minimum indentation of any non-blank lines
8006+
// after first line.
8007+
while (p < pend && *p++ != '\n') {
8008+
}
8009+
8010+
Py_ssize_t margin = PY_SSIZE_T_MAX;
8011+
while (p < pend) {
8012+
const char *s = p;
8013+
while (*p == ' ') p++;
8014+
if (p < pend && *p != '\n') {
8015+
margin = Py_MIN(margin, p - s);
8016+
}
8017+
while (p < pend && *p++ != '\n') {
8018+
}
8019+
}
8020+
if (margin == PY_SSIZE_T_MAX) {
8021+
margin = 0;
8022+
}
8023+
8024+
// Second pass: write cleandoc into buff.
8025+
8026+
// copy first line without leading spaces.
8027+
p = doc_utf8;
8028+
while (*p == ' ') {
8029+
p++;
8030+
}
8031+
if (p == doc_utf8 && margin == 0 ) {
8032+
// doc is already clean.
8033+
return doc;
8034+
}
8035+
8036+
char *buff = PyMem_Malloc(doc_size);
8037+
char *w = buff;
8038+
8039+
while (p < pend) {
8040+
int ch = *w++ = *p++;
8041+
if (ch == '\n') {
8042+
break;
8043+
}
8044+
}
8045+
8046+
// copy subsequent lines without margin.
8047+
while (p < pend) {
8048+
for (Py_ssize_t i = 0; i < margin; i++, p++) {
8049+
if (*p != ' ') {
8050+
assert(*p == '\n' || *p == '\0');
8051+
break;
8052+
}
8053+
}
8054+
while (p < pend) {
8055+
int ch = *w++ = *p++;
8056+
if (ch == '\n') {
8057+
break;
8058+
}
8059+
}
8060+
}
8061+
8062+
Py_DECREF(doc);
8063+
return PyUnicode_FromStringAndSize(buff, w - buff);
8064+
}
8065+
8066+
79708067
PyObject *
79718068
_PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags,
79728069
int optimize, int compile_mode)

0 commit comments

Comments
 (0)
0