10000 gh-115077: Argument Clinic: generate better error messages when parsing function declaration by erlend-aasland · Pull Request #115555 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-115077: Argument Clinic: generate better error messages when parsing function declaration #115555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 81 additions & 30 deletions Lib/test/test_clinic.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,18 +591,6 @@ class C "void *" ""
err = "'__new__' must be a class method"
self.expect_failure(block, err, lineno=7)

def test_no_c_basename_cloned(self):
block = """
/*[clinic input]
foo2
[clinic start generated code]*/
/*[clinic input]
foo as = foo2
[clinic start generated code]*/
"""
err = "No C basename provided after 'as' keyword"
self.expect_failure(block, err, lineno=5)

def test_cloned_with_custom_c_basename(self):
raw = dedent("""
/*[clinic input]
Expand Down Expand Up @@ -1203,6 +1191,33 @@ def test_return_converter(self):
""")
self.assertIsInstance(function.return_converter, clinic.int_return_converter)

def test_return_converter_with_custom_c_name(self):
function = self.parse_function("""
module os
bar as foo -> int
""")
self.assertIsInstance(function.return_converter, clinic.int_return_converter)
self.assertEqual(function.name, "bar")
self.assertEqual(function.c_basename, "foo")

def test_return_converter_with_arg(self):
function = self.parse_function("""
module os
os.stat -> int(py_default=None)
""")
self.assertIsInstance(function.return_converter, clinic.int_return_converter)
self.assertEqual(function.return_converter.py_default, None)

def test_return_converter_with_arg_and_custom_c_name(self):
function = self.parse_function("""
module os
bar as foo -> int(py_default=None)
""")
self.assertIsInstance(function.return_converter, clinic.int_return_converter)
self.assertEqual(function.return_converter.py_default, None)
self.assertEqual(function.name, "bar")
self.assertEqual(function.c_basename, "foo")

def test_return_converter_invalid_syntax(self):
block = """
module os
Expand Down Expand Up @@ -1535,28 +1550,64 @@ class foo.Bar "unused" "notneeded"
# but it *is* a parameter
self.assertEqual(1, len(function.parameters))

def test_illegal_module_line(self):
block = """
module foo
foo.bar => int
/
"""
err = "Illegal function name: 'foo.bar => int'"
self.expect_failure(block, err)
def test_invalid_syntax(self):
err = "Invalid syntax"
for block in (
"foo = bar baz",
"a b c d",
"foo as baz = bar ->",
"foo as bar bar = baz",
):
with self.subTest(block=block):
self.expect_failure(block, err)

def test_illegal_c_basename(self):
block = """
module foo
foo.bar as 935
/
"""
err = "Illegal C basename: '935'"
self.expect_failure(block, err)
err = "Illegal C basename"
for block in (
"foo as 935",
"foo as ''",
"foo as a.c",
):
with self.subTest(block=block):
self.expect_failure(block, err)

def test_illegal_cloned_name(self):
err = "Illegal source function name"
for block in (
"foo = 935",
"foo = ''",
):
with self.subTest(block=block):
self.expect_failure(block, err)

def test_no_return_annotation(self):
err = "No return annotation provided"
for block in (
"foo ->",
"foo as bar ->",
):
with self.subTest(block=block):
self.expect_failure(block, err)

def test_clone_no_source_function(self):
err = "No source function provided"
for block in (
"foo =",
"foo as bar =",
):
with self.subTest(block=block):
self.expect_failure(block, err)

def test_no_c_basename(self):
block = "foo as "
err = "No C basename provided after 'as' keyword"
self.expect_failure(block, err, strip=False)
err = &q 10000 uot;No C basename provided for 'foo' after 'as' keyword"
for block in (
"foo as",
"foo as ",
"foo as = clone",
"foo as -> int",
):
with self.subTest(block=block):
self.expect_failure(block, err)

def test_single_star(self):
block = """
Expand Down
123 changes: 83 additions & 40 deletions Tools/clinic/clinic.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
Any,
Final,
Literal,
NamedTuple,
NoReturn,
Protocol,
TypeVar,
Expand All @@ -52,6 +51,12 @@
import libclinic
import libclinic.cpp
from libclinic import ClinicError
from libclinic.parser import (
RE_CLONE,
RE_C_BASENAME,
RE_FULLNAME,
RE_RETURNS,
)


# TODO:
Expand Down Expand Up @@ -4757,11 +4762,6 @@ class ParamState(enum.IntEnum):
RIGHT_SQUARE_AFTER = 6


class FunctionNames(NamedTuple):
full_name: str
c_basename: str


class DSLParser:
function: Function | None
state: StateKeeper
Expand Down Expand Up @@ -5047,25 +5047,6 @@ def state_dsl_start(self, line: str) -> None:

self.next(self.state_modulename_name, line)

def parse_function_names(self, line: str) -> FunctionNames:
left, as_, right = line.partition(' as ')
full_name = left.strip()
c_basename = right.strip()
if as_ and not c_basename:
fail("No C basename provided after 'as' keyword")
if not c_basename:
fields = full_name.split(".")
if fields[-1] == '__new__':
fields.pop()
c_basename = "_".join(fields)
if not libclinic.is_legal_py_identifier(full_name):
fail(f"Illegal function name: {full_name!r}")
if not libclinic.is_legal_c_identifier(c_basename):
fail(f"Illegal C basename: {c_basename!r}")
names = FunctionNames(full_name=full_name, c_basename=c_basename)
self.normalize_function_kind(names.full_name)
return names

def normalize_function_kind(self, fullname: str) -> None:
# Fetch the method name and possibly class.
fields = fullname.split('.')
Expand All @@ -5089,7 +5070,7 @@ def normalize_function_kind(self, fullname: str) -> None:
self.kind = METHOD_INIT

def resolve_return_converter(
self, full_name: str, forced_converter: str
self, full_name: str, forced_converter: str | None
) -> CReturnConverter:
if forced_converter:
if self.kind in {GETTER, SETTER}:
Expand All @@ -5115,8 +5096,12 @@ def resolve_return_converter(
return init_return_converter()
return CReturnConverter()

def parse_cloned_function(self, names: FunctionNames, existing: str) -> None:
full_name, c_basename = names
def parse_cloned_function(
self,
full_name: str,
c_basename: str,
existing: str
) -> None:
fields = [x.strip() for x in existing.split('.')]
function_name = fields.pop()
module, cls = self.clinic._module_and_class(fields)
Expand Down Expand Up @@ -5158,6 +5143,70 @@ def parse_cloned_function(self, names: FunctionNames, existing: str) -> None:
(cls or module).functions.append(function)
self.next(self.state_function_docstring)

@staticmethod
def generate_c_basename(full_name: str) -> str:
fields = full_name.split(".")
if fields[-1] == '__new__':
fields.pop()
return "_".join(fields)

def parse_declaration(
self, line: str
) -> tuple[str, str, str | None, str | None]:
cloned = None
returns = None

def invalid_syntax(msg: str | None = None) -> NoReturn:
preamble = "Invalid syntax"
if msg:
preamble += f" ({msg})"
fail(f"{preamble}: {line!r}\n\n"
"Allowed syntax:\n"
"[module.[submodule.]][class.]func [as c_name] [-> return_annotation]\n\n"
"Nested submodules are allowed.")

m = RE_FULLNAME.match(line)
assert m
full_name = m[1]
if not libclinic.is_legal_py_identifier(full_name):
fail(f"Illegal function name: {full_name!r}")
pos = m.end()

m = RE_C_BASENAME.match(line, pos)
if m:
if not m[1]:
fail(f"No C basename provided for {full_name!r} after 'as' keyword")
c_basename = m[1]
if not libclinic.is_legal_c_identifier(c_basename):
fail(f"Illegal C basename: {c_basename!r}")
pos = m.end()
else:
c_basename = self.generate_c_basename(full_name)

m = RE_CLONE.match(line, pos)
if m:
if not m[1]:
fail(f"No source function provided for {full_name!r} after '=' keyword")
cloned = m[1]
if not libclinic.is_legal_py_identifier(cloned):
fail(f"Illegal source function name: {cloned!r}")
pos = m.end()

m = RE_RETURNS.match(line, pos)
if m:
if cloned:
invalid_syntax()
if not m[1]:
fail(f"No return annotation provided for {full_name!r} after '->' keyword")
returns = m[1].strip()
pos = m.end()

if pos != len(line):
invalid_syntax()

self.normalize_function_kind(full_name)
return full_name, c_basename, cloned, returns

def state_modulename_name(self, line: str) -> None:
# looking for declaration, which establishes the leftmost column
# line should be
Expand All @@ -5178,20 +5227,14 @@ def state_modulename_name(self, line: str) -> None:
assert self.valid_line(line)
self.indent.infer(line)

# are we cloning?
before, equals, existing = line.rpartition('=')
if equals:
existing = existing.strip()
if libclinic.is_legal_py_identifier(existing):
# we're cloning!
names = self.parse_function_names(before)
return self.parse_cloned_function(names, existing)

line, _, returns = line.partition('->')
returns = returns.strip()
full_name, c_basename = self.parse_function_names(line)
full_name, c_basename, cloned, returns = self.parse_declaration(line)

if cloned:
return self.parse_cloned_function(full_name, c_basename, cloned)

return_converter = self.resolve_return_converter(full_name, returns)

# Split out function name, and determine module and class.
fields = [x.strip() for x in full_name.split('.')]
function_name = fields.pop()
module, cls = self.clinic._module_and_class(fields)
Expand Down
7 changes: 7 additions & 0 deletions Tools/clinic/libclinic/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import re


RE_FULLNAME = re.compile(r"\s*([\w.]+)\s*")
RE_C_BASENAME = re.compile(r"\bas\b\s*(?:([^-=\s]+)\s*)?")
RE_CLONE = re.compile(r"=\s*(?:([^-=\s]+)\s*)?")
Comment on lines +5 to +6
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote it pass most of your tests, but perhaps \w+ or [\w.]+ is better than [^-=\s]+. It will produce different error message for foo.bar as '', but it may be for good.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, my test case might also be too contrived.

RE_RETURNS = re.compile(r"->\s*(.*)")
0