8000 gh-104169: Fix test_peg_generator after tokenizer refactoring (#110727) · python/cpython@17d6554 · GitHub
[go: up one dir, main page]

Skip to content

Commit 17d6554

Browse files
authored
gh-104169: Fix test_peg_generator after tokenizer refactoring (#110727)
* Fix test_peg_generator after tokenizer refactoring * Remove references to tokenizer.c in comments etc.
1 parent 2364542 commit 17d6554

File tree

10 files changed

+24
-14
lines changed

10 files changed

+24
-14
lines changed

Lib/test/test_exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def testSyntaxErrorOffset(self):
253253
check('try:\n pass\nexcept*:\n pass', 3, 8)
254254
check('try:\n pass\nexcept*:\n pass\nexcept* ValueError:\n pass', 3, 8)
255255

256-
# Errors thrown by tokenizer.c
256+
# Errors thrown by the tokenizer
257257
check('(0x+1)', 1, 3)
258258
check('x = 0xI', 1, 6)
259259
check('0010 + 2', 1, 1)

Lib/test/test_source_encoding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ class UTF8ValidatorTest(unittest.TestCase):
255255
def test_invalid_utf8(self):
256256
# This is a port of test_utf8_decode_invalid_sequences in
257257
# test_unicode.py to exercise the separate utf8 validator in
258-
# Parser/tokenizer.c used when reading source files.
258+
# Parser/tokenizer/helpers.c used when reading source files.
259259

260260
# That file is written using low-level C file I/O, so the only way to
261261
# test it is to write actual files to disk.

Lib/test/test_tokenize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,7 +1435,7 @@ def test_cookie_second_line_empty_first_line(self):
14351435
self.assertEqual(consumed_lines, expected)
14361436

14371437
def test_latin1_normalization(self):
1438-
# See get_normal_name() in tokenizer.c.
1438+
# See get_normal_name() in Parser/tokenizer/helpers.c.
14391439
encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
14401440
"iso-8859-1-unix", "iso-latin-1-mac")
14411441
for encoding in encodings:
@@ -1460,7 +1460,7 @@ def test_syntaxerror_latin1(self):
14601460

14611461

14621462
def test_utf8_normalization(self):
1463-
# See get_normal_name() in tokenizer.c.
1463+
# See get_normal_name() in Parser/tokenizer/helpers.c.
14641464
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
14651465
for encoding in encodings:
14661466
for rep in ("-", "_"):

Lib/tokenize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def untokenize(iterable):
298298

299299

300300
def _get_normal_name(orig_enc):
301-
"""Imitates get_normal_name in tokenizer.c."""
301+
"""Imitates get_normal_name in Parser/tokenizer/helpers.c."""
302302
# Only care about the first 12 characters.
303303
enc = orig_enc[:12].lower().replace("_", "-")
304304
if enc == "utf-8" or enc.startswith("utf-8-"):

Modules/config.c.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ struct _inittab _PyImport_Inittab[] = {
4545
/* This lives in Python/Python-ast.c */
4646
{"_ast", PyInit__ast},
4747

48-
/* This lives in Python/Python-tokenizer.c */
48+
/* This lives in Python/Python-tokenize.c */
4949
{"_tokenize", PyInit__tokenize},
5050

5151
/* These entries are here for sys.builtin_module_names */

Parser/myreadline.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
2+
/* Readline interface for the tokenizer and [raw_]input() in bltinmodule.c.
33
By default, or when stdin is not a tty device, we have a super
44
simple my_readline function using fgets.
55
Optionally, we can use the GNU readline library.
@@ -364,7 +364,7 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
364364
char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *) = NULL;
365365

366366

367-
/* Interface used by tokenizer.c and bltinmodule.c */
367+
/* Interface used by file_tokenizer.c and bltinmodule.c */
368368

369369
char *
370370
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)

Parser/string_parser.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ static int
1414
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
1515
{
1616
unsigned char c = *first_invalid_escape;
17-
if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) { // in this case the tokenizer has already emitted a warning,
18-
// see tokenizer.c:warn_invalid_escape_sequence
17+
if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) {
18+
// in this case the tokenizer has already emitted a warning,
19+
// see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
1920
return 0;
2021
}
2122

Python/traceback.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#define MAX_FRAME_DEPTH 100
3333
#define MAX_NTHREADS 100
3434

35-
/* Function from Parser/tokenizer.c */
35+
/* Function from Parser/tokenizer/file_tokenizer.c */
3636
extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
3737

3838
/*[clinic input]

Tools/c-analyzer/TODO

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,8 @@ Objects/typeobject.c:type_new():PyId___slots__ _Py_IDENTIFIER(
428428
Objects/unicodeobject.c:unicodeiter_reduce():PyId_iter _Py_IDENTIFIER(iter)
429429
Objects/weakrefobject.c:proxy_bytes():PyId___bytes__ _Py_IDENTIFIER(__bytes__)
430430
Objects/weakrefobject.c:weakref_repr():PyId___name__ _Py_IDENTIFIER(__name__)
431-
Parser/tokenizer.c:fp_setreadl():PyId_open _Py_IDENTIFIER(open)
432-
Parser/tokenizer.c:fp_setreadl():PyId_readline _Py_IDENTIFIER(readline)
431+
Parser/tokenizer/file_tokenizer.c:fp_setreadl():PyId_open _Py_IDENTIFIER(open)
432+
Parser/tokenizer/file_tokenizer.c:fp_setreadl():PyId_readline _Py_IDENTIFIER(readline)
433433
Python/Python-ast.c:ast_type_reduce():PyId___dict__ _Py_IDENTIFIER(__dict__)
434434
Python/Python-ast.c:make_type():PyId___module__ _Py_IDENTIFIER(__module__)
435435
Python/_warnings.c:PyId_stderr _Py_IDENTIFIER(stderr)

Tools/peg_generator/pegen/build.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,14 @@ def compile_c_extension(
123123
common_sources = [
124124
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
125125
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
126-
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
126+
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "lexer.c"),
127+
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "state.c"),
128+
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "buffer.c"),
129+
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "string_tokenizer.c"),
130+
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "file_tokenizer.c"),
131+
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "utf8_tokenizer.c"),
132+
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "readline_tokenizer.c"),
133+
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "helpers.c"),
127134
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
128135
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
129136
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
@@ -133,6 +140,8 @@ def compile_c_extension(
133140
include_dirs = [
134141
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
135142
str(MOD_DIR.parent.parent.parent / "Parser"),
143+
str(MOD_DIR.parent.parent.parent / "Parser" / "lexer"),
144+
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer"),
136145
]
137146
extension = Extension(
138147
extension_name,

0 commit comments

Comments
 (0)
0