8000 [3.12] gh-41872: Fix quick extraction of module docstrings from a fil… · python/cpython@b69b9da · GitHub
[go: up one dir, main page]

Skip to content

Commit b69b9da

Browse files
miss-islingtonsrinivasreddymerwokserhiy-storchaka
authored
[3.12] gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520) (GH-128621)
It now supports docstrings with single quotes, escape sequences, raw string literals, and other Python syntax. (cherry picked from commit 474e419) Co-authored-by: Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్ రెడ్డి) <thatiparthysreenivas@gmail.com> Co-authored-by: Éric <merwok@netwok.org> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent d8890fb commit b69b9da

File tree

3 files changed

+104
-15
lines changed

3 files changed

+104
-15
lines changed

Lib/pydoc.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class or function within a module or module in a package. If the
5454
# the current directory is changed with os.chdir(), an incorrect
5555
# path will be displayed.
5656

57+
import ast
5758
import __future__
5859
import builtins
5960
import importlib._bootstrap
@@ -346,21 +347,29 @@ def ispackage(path):
346347
return False
347348

348349
def source_synopsis(file):
349-
line = file.readline()
350-
while line[:1] == '#' or not line.strip():
351-
line = file.readline()
352-
if not line: break
353-
line = line.strip()
354-
if line[:4] == 'r"""': line = line[1:]
355-
if line[:3] == '"""':
356-
line = line[3:]
357-
if line[-1:] == '\\': line = line[:-1]
358-
while not line.strip():
359-
line = file.readline()
360-
if not line: break
361-
result = line.split('"""')[0].strip()
362-
else: result = None
363-
return result
350+
"""Return the one-line summary of a file object, if present"""
351+
352+
string = ''
353+
try:
354+
tokens = tokenize.generate_tokens(file.readline)
355+
for tok_type, tok_string, _, _, _ in tokens:
356+
if tok_type == tokenize.STRING:
357+
string += tok_string
358+
elif tok_type == tokenize.NEWLINE:
359+
with warnings.catch_warnings():
360+
# Ignore the "invalid escape sequence" warning.
361+
warnings.simplefilter("ignore", SyntaxWarning)
362+
docstring = ast.literal_eval(string)
363+
if not isinstance(docstring, str):
364+
return None
365+
return docstring.strip().split('\n')[0].strip()
366+
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
367+
string += tok_string
368+
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
369+
return None
370+
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
371+
return None
372+
return None
364373

365374
def synopsis(filename, cache={}):
366375
"""Get the one-line summary out of a module file."""

Lib/test/test_pydoc/test_pydoc.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import contextlib
44
import importlib.util
55
import inspect
6+
import io
67
import pydoc
78
import py_compile
89
import keyword
@@ -841,6 +842,82 @@ def test_synopsis(self):
841842
synopsis = pydoc.synopsis(TESTFN, {})
842843
self.assertEqual(synopsis, 'line 1: h\xe9')
843844

845+
def test_source_synopsis(self):
846+
def check(source, expected, encoding=None):
847+
if isinstance(source, str):
848+
source_file = StringIO(source)
849+
else:
850+
source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
851+
with source_file:
852+
result = pydoc.source_synopsis(source_file)
853+
self.assertEqual(result, expected)
854+
855+
check('"""Single line docstring."""',
856+
'Single line docstring.')
857+
check('"""First line of docstring.\nSecond line.\nThird line."""',
858+
'First line of docstring.')
859+
check('"""First line of docstring.\\nSecond line.\\nThird line."""',
860+
'First line of docstring.')
861+
check('""" Whitespace around docstring. """',
862+
'Whitespace around docstring.')
863+
check('import sys\n"""No docstring"""',
864+
None)
865+
check(' \n"""Docstring after empty line."""',
866+
'Docstring after empty line.')
867+
check('# Comment\n"""Docstring after comment."""',
868+
'Docstring after comment.')
869+
check(' # Indented comment\n"""Docstring after comment."""',
870+
'Docstring after comment.')
871+
check('""""""', # Empty docstring
872+
'')
873+
check('', # Empty file
874+
None)
875+
check('"""Embedded\0null byte"""',
876+
None)
877+
check('"""Embedded null byte"""\0',
878+
None)
879+
check('"""Café and résumé."""',
880+
'Café and résumé.')
881+
check("'''Triple single quotes'''",
882+
'Triple single quotes')
883+
check('"Single double quotes"',
884+
'Single double quotes')
885+
check("'Single single quotes'",
886+
'Single single quotes')
887+
check('"""split\\\nline"""',
888+
'splitline')
889+
check('"""Unrecognized escape \\sequence"""',
890+
'Unrecognized escape \\sequence')
891+
check('"""Invalid escape seq\\uence"""',
892+
None)
893+
check('r"""Raw \\stri\\ng"""',
894+
'Raw \\stri\\ng')
895+
check('b"""Bytes literal"""',
896+
None)
897+
check('f"""f-string"""',
898+
None)
899+
check('"""Concatenated""" \\\n"string" \'literals\'',
900+
'Concatenatedstringliterals')
901+
check('"""String""" + """expression"""',
902+
None)
903+
check('("""In parentheses""")',
904+
'In parentheses')
905+
check('("""Multiple lines """\n"""in parentheses""")',
906+
'Multiple lines in parentheses')
907+
check('()', # tuple
908+
None)
909+
check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
910+
'€uro sign', encoding='iso-8859-15')
911+
check(b'"""\xa4"""', # Decoding error
912+
None, encoding='utf-8')
913+
914+
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
915+
temp_file.write('"""Real file test."""\n')
916+
temp_file.flush()
917+
temp_file.seek(0)
918+
result = pydoc.source_synopsis(temp_file)
919+
self.assertEqual(result, "Real file test.")
920+
844921
@requires_docstrings
845922
def test_synopsis_sourceless(self):
846923
os = import_helper.import_fresh_module('os')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
2+
It now supports docstrings with single quotes, escape sequences,
3+
raw string literals, and other Python syntax.

0 commit comments

Comments
 (0)
0