8000 [3.12] gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520) by miss-islington · Pull Request #128621 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.12] gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520) #128621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions Lib/pydoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class or function within a module or module in a package. If the
# the current directory is changed with os.chdir(), an incorrect
# path will be displayed.

import ast
import __future__
import builtins
import importlib._bootstrap
Expand Down Expand Up @@ -346,21 +347,29 @@ def ispackage(path):
return False

def source_synopsis(file):
line = file.readline()
while line[:1] == '#' or not line.strip():
line = file.readline()
if not line: break
line = line.strip()
if line[:4] == 'r"""': line = line[1:]
if line[:3] == '"""':
line = line[3:]
if line[-1:] == '\\': line = line[:-1]
while not line.strip():
line = file.readline()
if not line: break
result = line.split('"""')[0].strip()
else: result = None
return result
"""Return the one-line summary of a file object, if present"""

string = ''
try:
tokens = tokenize.generate_tokens(file.readline)
for tok_type, tok_string, _, _, _ in tokens:
if tok_type == tokenize.STRING:
string += tok_string
elif tok_type == tokenize.NEWLINE:
with warnings.catch_warnings():
# Ignore the "invalid escape sequence" warning.
warnings.simplefilter("ignore", SyntaxWarning)
docstring = ast.literal_eval(string)
if not isinstance(docstring, str):
return None
return docstring.strip().split('\n')[0].strip()
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
string += tok_string
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
return None
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
return None
return None

def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""
Expand Down
77 changes: 77 additions & 0 deletions Lib/test/test_pydoc/test_pydoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import contextlib
import importlib.util
import inspect
import io
import pydoc
import py_compile
import keyword
Expand Down Expand Up @@ -841,6 +842,82 @@ def test_synopsis(self):
synopsis = pydoc.synopsis(TESTFN, {})
self.assertEqual(synopsis, 'line 1: h\xe9')

def test_source_synopsis(self):
def check(source, expected, encoding=None):
if isinstance(source, str):
source_file = StringIO(source)
else:
source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
with source_file:
result = pydoc.source_synopsis(source_file)
self.assertEqual(result, expected)

check('"""Single line docstring."""',
'Single line docstring.')
check('"""First line of docstring.\nSecond line.\nThird line."""',
'First line of docstring.')
check('"""First line of docstring.\\nSecond line.\\nThird line."""',
'First line of docstring.')
check('""" Whitespace around docstring. """',
'Whitespace around docstring.')
check('import sys\n"""No docstring"""',
None)
check(' \n"""Docstring after empty line."""',
'Docstring after empty line.')
check('# Comment\n"""Docstring after comment."""',
'Docstring after comment.')
check(' # Indented comment\n"""Docstring after comment."""',
'Docstring after comment.')
check('""""""', # Empty docstring
'')
check('', # Empty file
None)
check('"""Embedded\0null byte"""',
None)
check('"""Embedded null byte"""\0',
None)
check('"""Café and résumé."""',
'Café and résumé.')
check("'''Triple single quotes'''",
'Triple single quotes')
check('"Single double quotes"',
'Single double quotes')
check("'Single single quotes'",
'Single single quotes')
check('"""split\\\nline"""',
'splitline')
check('"""Unrecognized escape \\sequence"""',
'Unrecognized escape \\sequence')
check('"""Invalid escape seq\\uence"""',
None)
check('r"""Raw \\stri\\ng"""',
'Raw \\stri\\ng')
check('b"""Bytes literal"""',
None)
check('f"""f-string"""',
None)
check('"""Concatenated""" \\\n"string" \'literals\'',
'Concatenatedstringliterals')
check('"""String""" + """expression"""',
None)
check('("""In parentheses""")',
'In parentheses')
check('("""Multiple lines """\n"""in parentheses""")',
'Multiple lines in parentheses')
check('()', # tuple
None)
check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
'€uro sign', encoding='iso-8859-15')
check(b'"""\xa4"""', # Decoding error
None, encoding='utf-8')

with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
temp_file.write('"""Real file test."""\n')
temp_file.flush()
temp_file.seek(0)
result = pydoc.source_synopsis(temp_file)
self.assertEqual(result, "Real file test.")

@requires_docstrings
def test_synopsis_sourceless(self):
os = import_helper.import_fresh_module('os')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
It now supports docstrings with single quotes, escape sequences,
raw string literals, and other Python syntax.
Loading
0