8000 bpo-40688: Use the correct parser in the peg_generator scripts (GH-20… · python/cpython@9645930 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 9645930

Browse files
authored
bpo-40688: Use the correct parser in the peg_generator scripts (GH-20235)
The scripts in `Tools/peg_generator/scripts` mostly assume that `ast.parse` and `compile` use the old parser, since this was the state of things, while we were developing them. They need to be updated to always use the correct parser. `_peg_parser` is being extended to support both parsing and compiling with both parsers.
1 parent 4483253 commit 9645930

File tree

6 files changed

+136
-172
lines changed

6 files changed

+136
-172
lines changed

Modules/_peg_parser.c

Lines changed: 81 additions & 52 deletions
< 6D4E td data-grid-cell-id="diff-eff75977216acfbf58d51cf4e06cbcd558be39011196c414bef5bc632f209e46-27-24-0" data-selected="false" role="gridcell" style="background-color:var(--diffBlob-additionNum-bgColor, var(--diffBlob-addition-bgColor-num));text-align:center" tabindex="-1" valign="top" class="focusable-grid-cell diff-line-number position-relative left-side">
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,133 @@
11
#include <Python.h>
22
#include "pegen_interface.h"
33

4-
PyObject *
5-
_Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds)
4+
static int
5+
_mode_str_to_int(char *mode_str)
66
{
7-
static char *keywords[] = {"file", "mode", NULL};
8-
char *filename;
9-
char *mode_str = "exec";
10-
11-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &filename, &mode_str)) {
12-
return NULL;
13-
}
14-
157
int mode;
168
if (strcmp(mode_str, "exec") == 0) {
179
mode = Py_file_input;
1810
}
11+
else if (strcmp(mode_str, "eval") == 0) {
12+
mode = Py_eval_input;
13+
}
1914
else if (strcmp(mode_str, "single") == 0) {
2015
mode = Py_single_input;
2116
}
2217
else {
23-
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'single'");
18+
mode = -1;
2419
}
20+
return mode;
21+
}
2522

26-
PyArena *arena = PyArena_New();
27-
if (arena == NULL) {
23+
static mod_ty
24+
_run_parser(char *str, char *filename, int mode, PyCompilerFlags *flags, PyArena *arena, int oldparser)
25+
{
26+
mod_ty mod;
27+
if (!oldparser) {
28+
mod = PyPegen_ASTFromString(str, filename, mode, flags, arena);
29+
}
30+
else {
31+
mod = PyParser_ASTFromString(str, filename, mode, flags, arena);
32+
}
33+
return mod;
34+
}
35+
36+
PyObject *
37+
_Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
38+
{
39+
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
40+
char *the_string;
41+
char *filename = "<string>";
42+
char *mode_str = "exec";
43+
int oldparser = 0;
44+
45+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
46+
&the_string, &filename, &mode_str, &oldparser)) {
2847
return NULL;
2948
}
3049

50+
int mode = _mode_str_to_int(mode_str);
51+
if (mode == -1) {
52+
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
53+
}
54+
3155
PyCompilerFlags flags = _PyCompilerFlags_INIT;
32-
PyObject *result = NULL;
56+
flags.cf_flags = PyCF_IGNORE_COOKIE;
3357

34-
mod_ty res = PyPegen_ASTFromFilename(filename, mode, &flags, arena);
35-
if (res == NULL) {
36-
goto error;
58+
PyArena *arena = PyArena_New();
59+
if (arena == NULL) {
60+
return NULL;
61+
}
62+
63+
mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
64+
if (mod == NULL) {
65+
PyArena_Free(arena);
66+
return NULL;
3767
}
38-
result = PyAST_mod2obj(res);
3968

40-
error:
69+
PyObject *filename_ob = PyUnicode_DecodeFSDefault(filename);
70+
if (filename_ob == NULL) {
71+
PyArena_Free(arena);
72+
return NULL;
73+
}
74+
PyCodeObject *result = PyAST_CompileObject(mod, filename_ob, &flags, -1, arena);
75+
Py_XDECREF(filename_ob);
4176
PyArena_Free(arena);
42-
return result;
77+
return (PyObject *)result;
4378
}
4479

4580
PyObject *
4681
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
4782
{
48-
static char *keywords[] = {"string", "mode", "oldparser", NULL};
83+
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
4984
char *the_string;
85+
char *filename = "<string>";
5086
char *mode_str = "exec";
5187
int oldparser = 0;
5288

53-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sp", keywords,
54-
&the_string, &mode_str, &oldparser)) {
89+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
90+
&the_string, &filename, &mode_str, &oldparser)) {
5591
return NULL;
5692
}
5793

58-
int mode;
59-
if (strcmp(mode_str, "exec") == 0) {
60-
mode = Py_file_input;
61-
}
62-
else if (strcmp(mode_str, "eval") == 0) {
63-
mode = Py_eval_input;
64-
}
65-
else if (strcmp(mode_str, "single") == 0) {
66- mode = Py_single_input;
67-
}
68-
else {
94+
int mode = _mode_str_to_int(mode_str);
95+
if (mode == -1) {
6996
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
7097
}
7198

99+
PyCompilerFlags flags = _PyCompilerFlags_INIT;
100+
flags.cf_flags = PyCF_IGNORE_COOKIE;
101+
72102
PyArena *arena = PyArena_New();
73103
if (arena == NULL) {
74104
return NULL;
75105
}
76106

77-
PyObject *result = NULL;
78-
79-
PyCompilerFlags flags = _PyCompilerFlags_INIT;
80-
flags.cf_flags = PyCF_IGNORE_COOKIE;
81-
82-
mod_ty res;
83-
if (oldparser) {
84-
res = PyParser_ASTFromString(the_string, "<string>", mode, &flags, arena);
85-
}
86-
else {
87-
res = PyPegen_ASTFromString(the_string, "<string>", mode, &flags, arena);
88-
}
89-
if (res == NULL) {
90-
goto error;
107+
mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
108+
if (mod == NULL) {
109+
PyArena_Free(arena);
110+
return NULL;
91111
}
92-
result = PyAST_mod2obj(res);
93112

94-
error:
113+
PyObject *result = PyAST_mod2obj(mod);
95114
PyArena_Free(arena);
96115
return result;
97116
}
98117

99118
static PyMethodDef ParseMethods[] = {
100-
{"parse_file", (PyCFunction)(void (*)(void))_Py_parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
101-
{"parse_string", (PyCFunction)(void (*)(void))_Py_parse_string, METH_VARARGS|METH_KEYWORDS,"Parse a string."},
119+
{
120+
"parse_string",
121+
(PyCFunction)(void (*)(void))_Py_parse_string,
122+
METH_VARARGS|METH_KEYWORDS,
123+
"Parse a string, return an AST."
124+
},
125+
{
126+
"compile_string",
127+
(PyCFunction)(void (*)(void))_Py_compile_string,
128+
METH_VARARGS|METH_KEYWORDS,
129+
"Compile a string, return a code object."
130+
},
102131
{NULL, NULL, 0, NULL} /* Sentinel */
103132
};
104133

Tools/peg_generator/Makefile

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,25 +69,22 @@ stats: peg_extension/parse.c data/xxl.py
6969

7070
time: time_compile
7171

72-
time_compile: venv peg_extension/parse.c data/xxl.py
72+
time_compile: venv data/xxl.py
7373
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
7474

75-
time_parse: venv peg_extension/parse.c data/xxl.py
75+
time_parse: venv data/xxl.py
7676
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
7777

78-
time_check: venv peg_extension/parse.c data/xxl.py
79-
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl check
78+
time_old: time_old_compile
8079

81-
time_stdlib: time_stdlib_compile
82-
83-
time_stdlib_compile: venv peg_extension/parse.c data/xxl.py
80+
time_old_compile: venv data/xxl.py
8481
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
8582

86-
time_stdlib_parse: venv peg_extension/parse.c data/xxl.py
83+
time_old_parse: venv data/xxl.py
8784
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
8885

89-
test_local:
90-
$(PYTHON) scripts/test_parse_directory.py \
86+
time_peg_dir: venv
87+
$(VENVPYTHON) scripts/test_parse_directory.py \
9188
--grammar-file $(GRAMMAR) \
9289
--tokens-file $(TOKENS) \
9390
-d $(TESTDIR) \
@@ -96,8 +93,8 @@ test_local:
9693
--exclude "*/failset/**" \
9794
--exclude "*/failset/**/*"
9895

99-
test_global: $(CPYTHON)
100-
$(PYTHON) scripts/test_parse_directory.py \
96+
time_stdlib: $(CPYTHON) venv
97+
$(VENVPYTHON) scripts/test_parse_directory.py \
10198
--grammar-file $(GRAMMAR) \
10299
--tokens-file $(TOKENS) \
103100
-d $(CPYTHON) \
@@ -113,9 +110,6 @@ mypy: regen-metaparser
113110
format-python:
114111
black pegen scripts
115112

116-
bench: venv
117-
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check
118-
119113
format: format-python
120114

121115
find_max_nesting:

Tools/peg_generator/scripts/benchmark.py

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import os
77
from time import time
88

9+
import _peg_parser
10+
911
try:
1012
import memory_profiler
1113
except ModuleNotFoundError:
@@ -14,8 +16,6 @@
1416
sys.exit(1)
1517

1618
sys.path.insert(0, os.getcwd())
17-
from peg_extension import parse
18-
from pegen.build import build_c_parser_and_generator
1919
from scripts.test_parse_directory import parse_directory
2020

2121
argparser = argparse.ArgumentParser(
@@ -41,9 +41,6 @@
4141
"compile", help="Benchmark parsing and compiling to bytecode"
4242
)
4343
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
44-
command_check = subcommands.add_parser(
45-
"check", help="Benchmark parsing and throwing the tree away"
46-
)
4744

4845

4946
def benchmark(func):
@@ -66,55 +63,41 @@ def wrapper(*args):
6663
@benchmark
6764
def time_compile(source, parser):
6865
if parser == "cpython":
69-
return compile(source, os.path.join("data", "xxl.py"), "exec")
66+
return _peg_parser.compile_string(
67+
source,
68+
oldparser=True,
69+
)
7070
else:
71-
return parse.parse_string(source, mode=2)
71+
return _peg_parser.compile_string(source)
7272

7373

7474
@benchmark
7575
def time_parse(source, parser):
7676
if parser == "cpython":
77-
return ast.parse(source, os.path.join("data", "xxl.py"), "exec")
77+
return _peg_parser.parse_string(source, oldparser=True)
7878
else:
79-
return parse.parse_string(source, mode=1)
80-
81-
82-
@benchmark
83-
def time_check(source):
84-
return parse.parse_string(source, mode=0)
79+
return _peg_parser.parse_string(source)
8580

8681

8782
def run_benchmark_xxl(subcommand, parser, source):
8883
if subcommand == "compile":
8984
time_compile(source, parser)
9085
elif subcommand == "parse":
9186
time_parse(source, parser)
92-
elif subcommand == "check":
93-
time_check(source)
9487

9588

9689
def run_benchmark_stdlib(subcommand, parser):
97-
modes = {"compile": 2, "parse": 1, "check": 0}
98-
extension = None
99-
if parser == "pegen":
100-
extension = build_c_parser_and_generator(
101-
"../../Grammar/python.gram",
102-
"../../Grammar/Tokens",
103-
"peg_extension/parse.c",
104-
compile_extension=True,
105-
skip_actions=False,
106-
)
10790
for _ in range(3):
10891
parse_directory(
10992
"../../Lib",
11093
"../../Grammar/python.gram",
94+
"../../Grammar/Tokens",
11195
verbose=False,
11296
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
11397
skip_actions=False,
11498
tree_arg=0,
11599
short=True,
116-
extension=extension,
117-
mode=modes[subcommand],
100+
mode=2 if subcommand == "compile" else 1,
118101
parser=parser,
119102
)
120103

@@ -127,8 +110,6 @@ def main():
127110

128111
if subcommand is None:
129112
argparser.error("A benchmark to run is required")
130-
if subcommand == "check" and parser == "cpython":
131-
argparser.error("Cannot use check target with the CPython parser")
132113

133114
if target == "xxl":
134115
with open(os.path.join("data", "xxl.py"), "r") as f:

Tools/peg_generator/scripts/show_parse.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import sys
3131
import tempfile
3232

33+
import _peg_parser
34+
3335
from typing import List
3436

3537
sys.path.insert(0, os.getcwd())
@@ -72,7 +74,7 @@ def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
7274

7375

7476
def show_parse(source: str, verbose: bool = False) -> str:
75-
tree = ast.parse(source)
77+
tree = _peg_parser.parse_string(source, oldparser=True)
7678
return format_tree(tree, verbose).rstrip("\n")
7779

7880

@@ -90,17 +92,11 @@ def main() -> None:
9092
sep = " "
9193
program = sep.join(args.program)
9294
if args.grammar_file:
93-
sys.path.insert(0, os.curdir)
94-
from pegen.build import build_parser_and_generator
95-
96-
build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True)
97-
from pegen.parse import parse_string # type: ignore[import]
98-
99-
tree = parse_string(program, mode=1)
95+
tree = _peg_parser.parse_string(program)
10096

10197
if args.diff:
10298
a = tree
103-
b = ast.parse(program)
99+
b = _peg_parser.parse_string(program, oldparser=True)
104100
diff = diff_trees(a, b, args.verbose)
105101
if diff:
106102
for line in diff:
@@ -111,8 +107,8 @@ def main() -> None:
111107
print(f"# Parsed using {args.grammar_file}")
112108
print(format_tree(tree, args.verbose))
113109
else:
114-
tree = ast.parse(program)
115-
print("# Parse using ast.parse()")
110+
tree = _peg_parser.parse_string(program, oldparser=True)
111+
print("# Parse using the old parser")
116112
print(format_tree(tree, args.verbose))
117113

118114

0 commit comments

Comments
 (0)
0