8000 gh-104169: Fix test_peg_generator after tokenizer refactoring by lysnikolaou · Pull Request #110727 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-104169: Fix test_peg_generator after tokenizer refactoring #110727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Lib/test/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def testSyntaxErrorOffset(self):
check('try:\n pass\nexcept*:\n pass', 3, 8)
check('try:\n pass\nexcept*:\n pass\nexcept* ValueError:\n pass', 3, 8)

# Errors thrown by tokenizer.c
# Errors thrown by the tokenizer
check('(0x+1)', 1, 3)
check('x = 0xI', 1, 6)
check('0010 + 2', 1, 1)
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_source_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ class UTF8ValidatorTest(unittest.TestCase):
def test_invalid_utf8(self):
# This is a port of test_utf8_decode_invalid_sequences in
# test_unicode.py to exercise the separate utf8 validator in
# Parser/tokenizer.c used when reading source files.
# Parser/tokenizer/helpers.c used when reading source files.

# That file is written using low-level C file I/O, so the only way to
# test it is to write actual files to disk.
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_tokenize.py
10000
Original file line number Diff line number Diff line change
Expand Up @@ -1435,7 +1435,7 @@ def test_cookie_second_line_empty_first_line(self):
self.assertEqual(consumed_lines, expected)

def test_latin1_normalization(self):
# See get_normal_name() in tokenizer.c.
# See get_normal_name() in Parser/tokenizer/helpers.c.
encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
"iso-8859-1-unix", "iso-latin-1-mac")
for encoding in encodings:
Expand All @@ -1460,7 +1460,7 @@ def test_syntaxerror_latin1(self):


def test_utf8_normalization(self):
# See get_normal_name() in tokenizer.c.
# See get_normal_name() in Parser/tokenizer/helpers.c.
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
for encoding in encodings:
for rep in ("-", "_"):
Expand Down
2 changes: 1 addition & 1 deletion Lib/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def untokenize(iterable):


def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
"""Imitates get_normal_name in Parser/tokenizer/helpers.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
Expand Down
2 changes: 1 addition & 1 deletion Modules/config.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ struct _inittab _PyImport_Inittab[] = {
/* This lives in Python/Python-ast.c */
{"_ast", PyInit__ast},

/* This lives in Python/Python-tokenizer.c */
/* This lives in Python/Python-tokenize.c */
{"_tokenize", PyInit__tokenize},

/* These entries are here for sys.builtin_module_names */
Expand Down
4 changes: 2 additions & 2 deletions Parser/myreadline.c
8000
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
/* Readline interface for the tokenizer and [raw_]input() in bltinmodule.c.
By default, or when stdin is not a tty device, we have a super
simple my_readline function using fgets.
Optionally, we can use the GNU readline library.
Expand Down Expand Up @@ -364,7 +364,7 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *) = NULL;


/* Interface used by tokenizer.c and bltinmodule.c */
/* Interface used by file_tokenizer.c and bltinmodule.c */

char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
Expand Down
5 changes: 3 additions & 2 deletions Parser/string_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ static int
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
{
unsigned char c = *first_invalid_escape;
if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) { // in this case the tokenizer has already emitted a warning,
// see tokenizer.c:warn_invalid_escape_sequence
if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) {
// in this case the tokenizer has already emitted a warning,
// see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion Python/traceback.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#define MAX_FRAME_DEPTH 100
#define MAX_NTHREADS 100

/* Function from Parser/tokenizer.c */
/* Function from Parser/tokenizer/file_tokenizer.c */
extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);

/*[clinic input]
Expand Down
4 changes: 2 additions & 2 deletions Tools/c-analyzer/TODO
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ Objects/typeobject.c:type_new():PyId___slots__ _Py_IDENTIFIER(
Objects/unicodeobject.c:unicodeiter_reduce():PyId_iter _Py_IDENTIFIER(iter)
Objects/weakrefobject.c:proxy_bytes():PyId___bytes__ _Py_IDENTIFIER(__bytes__)
Objects/weakrefobject.c:weakref_repr():PyId___name__ _Py_IDENTIFIER(__name__)
Parser/tokenizer.c:fp_setreadl():PyId_open _Py_IDENTIFIER(open)
Parser/tokenizer.c:fp_setreadl():PyId_readline _Py_IDENTIFIER(readline)
Parser/tokenizer/file_tokenizer.c:fp_setreadl():PyId_open _Py_IDENTIFIER(open)
Parser/tokenizer/file_tokenizer.c:fp_setreadl():PyId_readline _Py_IDENTIFIER(readline)
Python/Python-ast.c:ast_type_reduce():PyId___dict__ _Py_IDENTIFIER(__dict__)
Python/Python-ast.c:make_type():PyId___module__ _Py_IDENTIFIER(__module__)
Python/_warnings.c:PyId_stderr _Py_IDENTIFIER(stderr)
Expand Down
11 changes: 10 additions & 1 deletion Tools/peg_generator/pegen/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,14 @@ def compile_c_extension(
common_sources = [
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "lexer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "state.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "buffer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "string_tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "file_tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "utf8_tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "readline_tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "helpers.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
Expand All @@ -133,6 +140,8 @@ def compile_c_extension(
include_dirs = [
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
str(MOD_DIR.parent.parent.parent / "Parser"),
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer"),
]
extension = Extension(
extension_name,
Expand Down
0